Skip to content

Instantly share code, notes, and snippets.

View wassname's full-sized avatar
🙃

wassname (Michael J Clark) wassname

🙃
View GitHub Profile
@wassname
wassname / unbalance_dask_dataframe.py
Created November 29, 2019 08:03
unbalance_dask_dataframe.py
%pylab inline
import pandas as pd
import dask.dataframe as dd
def get_unbal_df(size = 100, balance=None):
"""Get a randomly unbalanced df"""
if balance is None:
balance = np.random.randint(-100, 100)
if balance<0:
@wassname
wassname / data_block_binary.py
Last active November 26, 2019 02:42
fastai binary class data block
from typing import Iterator, Collection
from fastai.data_block import CategoryListBase
from fastai.text import *
class BinaryProcessor(CategoryProcessor):
def create_classes(self, classes):
self.classes = classes
if classes is not None: self.c2i = {0:0, 1:1}
def generate_classes(self, items):
@wassname
wassname / dataset_cache_with_dask.py
Last active May 29, 2020 02:09
Cache a torch dataset to npy files using dask
"""
Cache a torch dataset to npy files using dask
url:https://gist.github.com/wassname/f38f8774b6f97977b660d20dfa0f0036
lic:MIT
author:wassname
usage:
batch_size=16
chunk_size=batch_size*4
@wassname
wassname / jaccard_pytorch.py
Created September 16, 2019 01:43
jaccard distance loss pytorch [draft]
#!/usr/bin/env python
# coding: utf-8
get_ipython().run_line_magic('pylab', 'inline')
import torch
def jaccard_distance_loss(y_true, y_pred, smooth=100):
"""
Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|)
= sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|))
@wassname
wassname / less_simple_cache.py
Created July 29, 2019 07:30
how to cache with non immutable function inputs
def cache_load_utturances(ttl=360000):
"""
Decorator for wrapping simple cache around load_utterances.
Since some arguments are unhashable (tokenizer) or immutable (list) we need to make the key manually
"""
def decorate(func):
@simple_cache.wraps(func)
def wrapper(**kwargs):
@wassname
wassname / pandas_classification_report.py
Last active January 19, 2024 06:28
Scikit Learn Classification Report in a pandas Dataframe (and confusion)
"""
@url: https://gist.github.com/wassname/f3cbdc14f379ba9ec2acfafe5c1db592
"""
import pandas as pd
import sklearn.metrics
import numpy as np
def classification_report(*args, **kwargs):
"""
@wassname
wassname / running_stats.py
Last active November 14, 2023 15:09
Running stats (mean, standard deviation) for python, pytorch, etc
import numpy as np
# handle pytorch tensors etc, by using tensorboardX's method
try:
from tensorboardX.x2num import make_np
except ImportError:
def make_np(x):
return np.array(x).copy().astype('float16')
class RunningStats(object):
@wassname
wassname / runningmean.py
Created May 31, 2019 04:05
runningmean.py
class RunningMean(object):
def __init__(self, sum=0, i=0):
self.sum = sum
self.i = i
def __add__(self, other):
return RunningMean(self.sum+other, self.i+1)
def add(self, loss):
@wassname
wassname / simple_transformer.py
Last active September 22, 2021 18:52
Transformer in ~80 lines of code from Thomas Wolf's tweet https://twitter.com/Thom_Wolf/status/1129658539142766592
"""
Transformer in ~80 lines of code.
From Thomas Wolf's tweet https://twitter.com/Thom_Wolf/status/1129658539142766592.
"""
import torch
from torch import nn
class Transformer(nn.Module):
@wassname
wassname / jaccard_distance_loss.py
Last active March 7, 2022 22:10
jaccard_distance_loss for pytorch
class JaccardDistanceLoss(torch.nn.Module):
def __init__(self, smooth=100, dim=1, size_average=True, reduce=True):
"""
Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|)
= sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|))
The jaccard distance loss is usefull for unbalanced datasets. This has been
shifted so it converges on 0 and is smoothed to avoid exploding or disapearing
gradient.