This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| %pylab inline | |
| import pandas as pd | |
| import dask.dataframe as dd | |
| def get_unbal_df(size = 100, balance=None): | |
| """Get a randomly unbalanced df""" | |
| if balance is None: | |
| balance = np.random.randint(-100, 100) | |
| if balance<0: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from typing import Iterator, Collection | |
| from fastai.data_block import CategoryListBase | |
| from fastai.text import * | |
| class BinaryProcessor(CategoryProcessor): | |
| def create_classes(self, classes): | |
| self.classes = classes | |
| if classes is not None: self.c2i = {0:0, 1:1} | |
| def generate_classes(self, items): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Cache a torch dataset to npy files using dask | |
| url:https://gist.github.com/wassname/f38f8774b6f97977b660d20dfa0f0036 | |
| lic:MIT | |
| author:wassname | |
| usage: | |
| batch_size=16 | |
| chunk_size=batch_size*4 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| get_ipython().run_line_magic('pylab', 'inline') | |
| import torch | |
| def jaccard_distance_loss(y_true, y_pred, smooth=100): | |
| """ | |
| Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|) | |
| = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|)) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def cache_load_utturances(ttl=360000): | |
| """ | |
| Decorator for wrapping simple cache around load_utterances. | |
| Since some arguments are unhashable (tokenizer) or immutable (list) we need to make the key manually | |
| """ | |
| def decorate(func): | |
| @simple_cache.wraps(func) | |
| def wrapper(**kwargs): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| @url: https://gist.github.com/wassname/f3cbdc14f379ba9ec2acfafe5c1db592 | |
| """ | |
| import pandas as pd | |
| import sklearn.metrics | |
| import numpy as np | |
| def classification_report(*args, **kwargs): | |
| """ | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| # handle pytorch tensors etc, by using tensorboardX's method | |
| try: | |
| from tensorboardX.x2num import make_np | |
| except ImportError: | |
| def make_np(x): | |
| return np.array(x).copy().astype('float16') | |
| class RunningStats(object): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class RunningMean(object): | |
| def __init__(self, sum=0, i=0): | |
| self.sum = sum | |
| self.i = i | |
| def __add__(self, other): | |
| return RunningMean(self.sum+other, self.i+1) | |
| def add(self, loss): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Transformer in ~80 lines of code. | |
| From Thomas Wolf's tweet https://twitter.com/Thom_Wolf/status/1129658539142766592. | |
| """ | |
| import torch | |
| from torch import nn | |
| class Transformer(nn.Module): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class JaccardDistanceLoss(torch.nn.Module): | |
| def __init__(self, smooth=100, dim=1, size_average=True, reduce=True): | |
| """ | |
| Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|) | |
| = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|)) | |
| The jaccard distance loss is usefull for unbalanced datasets. This has been | |
| shifted so it converges on 0 and is smoothed to avoid exploding or disapearing | |
| gradient. |