Skip to content

Instantly share code, notes, and snippets.

View lmassaron's full-sized avatar
🦉
Running experiments

Luca Massaron lmassaron

🦉
Running experiments
View GitHub Profile
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
class ClassifierTransformer(BaseEstimator, TransformerMixin):
"""
Classifier's estimates of a regression problem using oof
"""
def __init__(self, estimator=None, n_classes=2, cv=3):
self.estimator = estimator
self.n_classes = n_classes
@lmassaron
lmassaron / gist:ee6f926e2fa3eb1fe204e47e1ae60c88
Last active September 5, 2021 07:04
Reduce memory usage of a pandas DataFrame
# Derived from the original script https://www.kaggle.com/gemartin/load-data-reduce-memory-usage
# by Guillaume Martin
def reduce_mem_usage(df, verbose=True):
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
start_mem = df.memory_usage().sum() / 1024**2
for col in df.columns:
col_type = df[col].dtypes
if col_type in numerics:
c_min = df[col].min()
@lmassaron
lmassaron / gist:0bce501423823ea857b9cd2375a93ccc
Created August 30, 2019 06:27
Creating a class for your model's hyper-parameters
class AllMyFields:
def __init__(self, dictionary):
for k, v in dictionary.items():
setattr(self, k, v)
o = AllMyFields({'alpha': 1, 'beta': 2})
o.a
@lmassaron
lmassaron / ResNeXt_gan.py
Created August 8, 2019 15:01 — forked from mjdietzx/ResNeXt_gan.py
Keras/tensorflow implementation of GAN architecture where generator and discriminator networks are ResNeXt.
from keras import layers
from keras import models
import tensorflow as tf
#
# generator input params
#
rand_dim = (1, 1, 2048) # dimension of the generator's input tensor (gaussian noise)
@lmassaron
lmassaron / target_encode
Last active September 13, 2024 05:16
Preprocessing scheme for high-cardinality categorical attributes
def add_noise(series, noise_level):
return series * (1 + noise_level * np.random.randn(len(series)))
def target_encode(trn_series=None, tst_series=None, target=None, k=1, f=1, noise_level=0):
"""
Encoding is computed like in the following paper by:
Micci-Barreca, Daniele. "A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems." ACM SIGKDD Explorations Newsletter 3.1 (2001): 27-32.
trn_series (pd.Series) : categorical feature in-sample
https://uk.mathworks.com/help/vision/ug/faster-r-cnn-basics.html
https://medium.com/@jonathan_hui/map-mean-average-precision-for-object-detection-45c121a31173
https://medium.com/@14prakash/the-intuition-behind-retinanet-eb636755607d
https://cv-tricks.com/object-detection/faster-r-cnn-yolo-ssd/
https://towardsdatascience.com/retinanet-how-focal-loss-fixes-single-shot-detection-cb320e3bb0de
https://medium.com/data-from-the-trenches/object-detection-with-deep-learning-on-aerial-imagery-2465078db8a9
https://medium.com/deep-learning-journals/fast-scnn-explained-and-implemented-using-tensorflow-2-0-6bd17c17a49e
https://github.com/Dharun/Tensorflow-License-Plate-Detection/blob/master/numplate_recognition_detection.py
https://xgboost.readthedocs.io/en/latest/tutorials/model.html
https://towardsdatascience.com/entropy-how-decision-trees-make-decisions-2946b9c18c8
https://github.com/Microsoft/LightGBM/issues/2062#issuecomment-477120125
https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree.pdf
https://explained.ai/gradient-boosting/
https://www.youtube.com/watch?v=5CWwwtEM2TA
# In statistical theory, Chauvenet's criterion (named for William Chauvenet[1]) is a means of assessing whether
# one piece of experimental data — an outlier — from a set of observations, is likely to be spurious.
# https://en.wikipedia.org/wiki/Chauvenet%27s_criterion
def chauvenet(array):
mean = array.mean() # Mean of incoming array
stdv = array.std() # Standard deviation
N = len(array) # Lenght of incoming array
criterion = 1.0/(2*N) # Chauvenet's criterion
d = abs(array-mean)/stdv # Distance of a value to mean in stdv's
https://machinelearningmastery.com/attention-long-short-term-memory-recurrent-neural-networks/
https://stackoverflow.com/questions/42918446/how-to-add-an-attention-mechanism-in-keras
# --- Attention is all you need --- #
_,_,units = layer.shape.as_list()
attention = Dense(1, activation='tanh')(layer)
attention = Flatten()(attention)
attention = Activation('softmax')(attention)
attention = RepeatVector(units)(attention)
attention = Permute([2, 1])(attention)
@lmassaron
lmassaron / gist:cac82df412f1ccd143d513902e04c8fb
Created February 28, 2019 08:48
Fast Computation of AUC-ROC score
import numpy as np
from numba import jit
@jit
def fast_auc(y_true, y_prob):
y_true = np.asarray(y_true)
y_true = y_true[np.argsort(y_prob)]
nfalse = 0
auc = 0
n = len(y_true)