This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Mock Args class, to be replaced with argparse or equivalent later | |
class Args: | |
def __init__(self, **attrs): | |
self.__dict__.update(attrs) | |
args = Args(**{ | |
"input_basepath" : "/home/nay/git-repos/w2v2-10min-exps/data/MASS-1h", | |
"train_tsv" : "train1h.tsv", | |
"valid_tsv" : "dev.tsv", | |
"output_basepath" : "/home/nay/librispeech-1h-fairseq/manifest-MASS-1h" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# NVIDIA official container with CUDA 11.7 (note pytorch-cuda=11.7 in PyTorch install below) | |
# See https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html | |
FROM nvcr.io/nvidia/pytorch:22.05-py3 | |
# Update container PyTorch from 1.9 to 1.13.1 | |
RUN conda install pytorch==1.13.1 \ | |
torchvision==0.14.1 \ | |
torchaudio==0.13.1 \ | |
pytorch-cuda=11.7 \ | |
-c pytorch \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(httr) | |
library(purrr) | |
library(tibble) | |
# See also https://bookdown.org/paul/apis_for_social_scientists/reddit-api.html | |
url <- 'https://www.reddit.com/r/mentalhealth/new.json?t=day&limit=100' | |
response <- GET(url, user_agent('Extracting data from Reddit')) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from multiprocessing import Pool | |
import tqdm | |
import time | |
def _foo(my_number): | |
square = my_number * my_number | |
time.sleep(1) | |
return square | |
if __name__ == '__main__': |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
# Copyright 2021 The HuggingFace Inc. team. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torchaudio | |
from torchaudio.models.decoder import ctc_decoder | |
from typing import List | |
bundle = torchaudio.pipelines.WAV2VEC2_ASR_LARGE_10M | |
acoustic_model = bundle.get_model() | |
acoustic_model.to('cuda') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ms_to_hms(start_ms: int) -> str: | |
import math | |
""" | |
Convert milliseconds to hours-minutes-seconds string format to use within identifiers | |
e.g. 3990060 ms -> '01h06m30.060' | |
""" | |
s, ms = divmod(start_ms, 1000) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from collections import Counter | |
from tqdm.contrib.concurrent import process_map | |
def get_vocab(texts_list, ids_list=None): | |
def sum_counters(counter_list): | |
''' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Abstract class ('phoneme') | |
class Consonant: | |
# Class attributes assigned when initialised ('realised') | |
def __init__(self, place_of_art, manner_of_art): | |
self.place_of_art = place_of_art | |
self.manner_of_art = manner_of_art | |
def lenite(self): | |
if self.manner_of_art == 'stop': | |
self.manner_of_art = 'fricative' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Adapted from https://stackoverflow.com/questions/12130883/r-expand-grid-function-in-python | |
# | |
# Usage: | |
# expand_grid([0, 1], [2,3,4]) | |
# | |
# Output: | |
# array([[0, 2], | |
# [1, 2], | |
# [0, 3], | |
# [1, 3], |