This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# checked against python 3.7.3, pandas 0.24.2, s3fs 0.4.2 | |
import tarfile | |
import io | |
import s3fs | |
import pandas as pd | |
tar_path = f"s3://my-bucket/debug.tar.gz" # path in s3 | |
metadata_path = "debug/metadata.csv" # path inside of the tar file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from hashlib import md5 | |
def text_to_hash(text): | |
return md5(text.encode("utf8")).hexdigest() | |
def add_hash(column_name="document"): | |
""" | |
Decorator. Wraps a function that returns a dataframe, must have column_name in columns. | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import soundfile as sf | |
from pydub import AudioSegment | |
# we want to convert source, mp3, into dest, a .wav file | |
source = "./recordings/test.mp3" | |
dest = "./recordings/test.wav" | |
# conversion - check! |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
def get_timestamp(): | |
""" | |
Print the date in m/d/y format, GMT | |
>>> get_timestamp() | |
'3_31_2020' | |
""" | |
t = time.gmtime() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import pickle | |
def openJSON(path): | |
""" | |
Safely opens json file at 'path' | |
""" | |
with open(path, 'r') as File: | |
data = json.load(File) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
params = {'legend.fontsize': 'x-large', | |
'figure.figsize': (15, 15), | |
'axes.labelsize': 'x-large', | |
'axes.titlesize': 'x-large', | |
'xtick.labelsize': 'x-large', | |
'ytick.labelsize': 'x-large'} | |
plt.rcParams.update(params) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import dill | |
dill._dill._reverse_typemap['SliceType'] = slice | |
dill._dill._reverse_typemap['ObjectType'] = object |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
git clone https://[insert username]:[insert password]@github.com/[insert organisation name]/[insert repo name].git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
def get_word_counts(document: str) -> pd.DataFrame: | |
""" | |
Turns a document into a dataframe of word, counts | |
Use preprocessing/lowercasing before this step for best results. | |
If passing many documents, use document = '\n'.join(iterable_of_documents) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
def flatten_directory(directory, delete_after=False): | |
""" | |
Flattens all folders in directory, deleting the empty folders after. | |
**WARNING** | |
This code WILL DELETE YOUR FILES | |
if used naively. Seriously. |
NewerOlder