Skip to content

Instantly share code, notes, and snippets.

View lmassaron's full-sized avatar
🦉
Running experiments

Luca Massaron lmassaron

🦉
Running experiments
View GitHub Profile
@lmassaron
lmassaron / heuristic_interaction_detection
Created September 20, 2018 05:47
Extracting decision rules from Scikit-learn tree data-structures
from sklearn.datasets import load_boston
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import BaggingRegressor
import pandas as pd
import numpy as np
boston = load_boston()
X = pd.DataFrame(boston['data'], columns=boston['feature_names'])
y = boston['target']
def random_crop(img, random_crop_size):
# Note: image_data_format is 'channel_last'
assert img.shape[2] == 3
height, width = img.shape[0], img.shape[1]
dy, dx = random_crop_size
x = np.random.randint(0, width - dx + 1)
y = np.random.randint(0, height - dy + 1)
return img[y:(y+dy), x:(x+dx), :]
def from_p_to_logit(prob):
return prob / (1 - prob)
def from_logit_to_p(logit):
return logit / (1 + logit)
def recalibrate_prob(prob, old_baseline, new_baseline):
"""
Recalibrates the probability
from a logistic regression
@lmassaron
lmassaron / rle
Created January 14, 2019 12:57
RLE: rle_encode / rle_decode
def rle_encode(mask):
"""Encodes a mask in Run Length Encoding (RLE).
Returns a string of space-separated values.
"""
assert mask.ndim == 2, "Mask must be of shape [Height, Width]"
# Flatten it column wise
m = mask.T.flatten()
# Compute gradient. Equals 1 or -1 at transition points
g = np.diff(np.concatenate([[0], m, [0]]), n=1)
# 1-based indicies of transition points (where gradient != 0)
@lmassaron
lmassaron / gist:9e218a3ac53700a34c26e158d6d3e412
Last active May 28, 2019 09:54
Custom metrics for Keras from Scikit-learn
# Custom metrics for Keras from Scikit-learn
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
def auroc(y_true, y_pred):
return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double)
def mAP(y_true, y_pred):
return tf.py_func(average_precision_score, (y_true, y_pred), tf.double)
@lmassaron
lmassaron / gist:79d3a3aa6e1bd1d556f97ee437d80c1e
Last active February 18, 2019 13:24
plot_keras_history
def plot_keras_history(history, measures):
"""
history: Keras training history
measures = list of names of measures
"""
rows = len(measures) // 2 + len(measures) % 2
fig, panels = plt.subplots(rows, 2, figsize=(15, 5))
plt.subplots_adjust(top = 0.99, bottom=0.01, hspace=0.4, wspace=0.2)
try:
panels = [item for sublist in panels for item in sublist]
@lmassaron
lmassaron / gist:b01f1d217c1899318f526f52b15ba460
Created February 11, 2019 11:15
Carvana loss functions
import keras.backend as K
from keras.backend.tensorflow_backend import _to_tensor
from keras.losses import binary_crossentropy
def dice_coef_clipped(y_true, y_pred, smooth=1.0):
y_true_f = K.flatten(K.round(y_true))
y_pred_f = K.flatten(K.round(y_pred))
intersection = K.sum(y_true_f * y_pred_f)
return 100. * (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
@lmassaron
lmassaron / gist:cac82df412f1ccd143d513902e04c8fb
Created February 28, 2019 08:48
Fast Computation of AUC-ROC score
import numpy as np
from numba import jit
@jit
def fast_auc(y_true, y_prob):
y_true = np.asarray(y_true)
y_true = y_true[np.argsort(y_prob)]
nfalse = 0
auc = 0
n = len(y_true)
https://machinelearningmastery.com/attention-long-short-term-memory-recurrent-neural-networks/
https://stackoverflow.com/questions/42918446/how-to-add-an-attention-mechanism-in-keras
# --- Attention is all you need --- #
_,_,units = layer.shape.as_list()
attention = Dense(1, activation='tanh')(layer)
attention = Flatten()(attention)
attention = Activation('softmax')(attention)
attention = RepeatVector(units)(attention)
attention = Permute([2, 1])(attention)
# In statistical theory, Chauvenet's criterion (named for William Chauvenet[1]) is a means of assessing whether
# one piece of experimental data — an outlier — from a set of observations, is likely to be spurious.
# https://en.wikipedia.org/wiki/Chauvenet%27s_criterion
def chauvenet(array):
mean = array.mean() # Mean of incoming array
stdv = array.std() # Standard deviation
N = len(array) # Lenght of incoming array
criterion = 1.0/(2*N) # Chauvenet's criterion
d = abs(array-mean)/stdv # Distance of a value to mean in stdv's