git clone [email protected]:YOUR-USERNAME/YOUR-FORKED-REPO.git
cd into/cloned/fork-repo
git remote add upstream git://github.com/ORIGINAL-DEV-USERNAME/REPO-YOU-FORKED-FROM.git
git fetch upstream
#!/bin/bash | |
sudo apt-get install -y \ | |
apt-transport-https \ | |
ca-certificates \ | |
curl \ | |
software-properties-common | |
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - | |
sudo apt-key fingerprint 0EBFCD88 | |
sudo add-apt-repository \ | |
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \ |
def seed_everything(seed: int): | |
import random, os | |
import numpy as np | |
import torch | |
random.seed(seed) | |
os.environ['PYTHONHASHSEED'] = str(seed) | |
np.random.seed(seed) | |
torch.manual_seed(seed) | |
torch.cuda.manual_seed(seed) |
# From https://stackoverflow.com/questions/23586510/return-multiple-columns-from-pandas-apply | |
def sizes(s): | |
return locale.format("%.1f", s / 1024.0, grouping=True) + ' KB', \ | |
locale.format("%.1f", s / 1024.0 ** 2, grouping=True) + ' MB', \ | |
locale.format("%.1f", s / 1024.0 ** 3, grouping=True) + ' GB' | |
df_test['size_kb'], df_test['size_mb'], df_test['size_gb'] = zip(*df_test['size'].apply(sizes)) |
pivot.columns | |
MultiIndex([('mean', 'is_suitable'), | |
('size', 'is_suitable')], | |
) | |
pivot.columns.map('_'.join) | |
Index(['mean_is_suitable', 'size_is_suitable'], dtype='object') |
def group_others(serie: pd.Series, | |
min_threshold: int) -> pd.Series: | |
""" | |
This function finds categorical values with little representation | |
and group them under the category "OTHERS" to mitigate the curse | |
of dimensionality, thus avoiding overfitting | |
""" | |
condition = (serie.value_counts() < min_threshold).values | |
other_group = list(serie.value_counts()[condition].index) |
import pylab as plt | |
plt.plot([1,2,3,10], [1,2,3,4]) | |
%matplot plt # Include this in the same cell as the plot |
def diversity_percentage(df, columns): | |
""" | |
This function returns the number of different elements in each column as a percentage of the total elements in the group. | |
A low value indicates there are many repeated elements. | |
Example 1: a value of 0 indicates all values are the same. | |
Example 2: a value of 100 indicates all values are different. | |
""" | |
diversity = dict() | |
for col in columns: |
def plot_nulls(dataframe): | |
def null_perc(dataframe): | |
return 100*dataframe.isnull().sum()/len(dataframe) | |
nulls = null_perc(dataframe) | |
plt.figure(1, figsize=(5,20)) # Customize this if needed | |
ax = sns.barplot(x=nulls, y=list(range(len(nulls))), orient='h', color="blue") | |
_ = plt.yticks(plt.yticks()[0], nulls.index) | |
ax.xaxis.set_ticks_position('top') |
git clone [email protected]:YOUR-USERNAME/YOUR-FORKED-REPO.git
cd into/cloned/fork-repo
git remote add upstream git://github.com/ORIGINAL-DEV-USERNAME/REPO-YOU-FORKED-FROM.git
git fetch upstream
from IPython.core.debugger import Tracer; | |
# Place this call wherever you want to start debugging | |
Tracer()() | |
""" | |
Some PDB Debuger commands: | |
n(ext) line and run this one | |
c(ontinue) running until next breakpoint |