Skip to content

Instantly share code, notes, and snippets.

View groverpr's full-sized avatar

Prince Grover groverpr

View GitHub Profile
def catboost_target_encoder(train, test, cols_encode, target):
train_new = train.copy()
test_new = test.copy()
for column in cols_encode:
global_mean = train[target].mean()
cumulative_sum = train.groupby(column)[target].cumsum() - train[target]
cumulative_count = train.groupby(column).cumcount()
train_new[column + "_cat_mean_target"] = cumulative_sum/cumulative_count
train_new[column + "_cat_mean_target"].fillna(global_mean, inplace=True)
from sklearn.model_selection import KFold
def target_encoder_kfold(train_data, test_data, cols_encode, target, folds=10):
"""
Mean regularized target encoding based on kfold
"""
kf = KFold(n_splits=folds, random_state=1)
for col in cols_encode:
global_mean = train_data[target].mean()
for train_index, test_index in kf.split(train_data):
@groverpr
groverpr / simulating_friedman
Created September 11, 2018 22:28
simulating friedman data for comparison studies
# simulating 10,000 data points with 2 useless and 5 uniformly distributed features
X, y = make_friedman1(n_samples=10000, n_features=7, noise=0.0, random_state=11)
# train-validation split
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.20, random_state=42)
@groverpr
groverpr / lightgbm_objective
Last active September 11, 2023 02:48
How to use objective and evaluation in lightgbm
import lightgbm
********* Sklearn API **********
# default lightgbm model with sklearn api
gbm = lightgbm.LGBMRegressor()
# updating objective function to custom
# default is "regression"
# also adding metrics to check different scores
gbm.set_params(**{'objective': custom_asymmetric_train}, metrics = ["mse", 'mae'])
@groverpr
groverpr / custom_loss
Last active September 26, 2018 18:14
How to write custom objective and custom eval metric in lightgbm
def custom_asymmetric_train(y_true, y_pred):
residual = (y_true - y_pred).astype("float")
grad = np.where(residual<0, -2*10.0*residual, -2*residual)
hess = np.where(residual<0, 2*10.0, 2.0)
return grad, hess
def custom_asymmetric_valid(y_true, y_pred):
residual = (y_true - y_pred).astype("float")
loss = np.where(residual < 0, (residual**2)*10.0, residual**2)
return "custom_asymmetric_eval", np.mean(loss), False
# huber loss
def huber(true, pred, delta):
loss = np.where(np.abs(true-pred) < delta , 0.5*((true-pred)**2), delta*np.abs(true - pred) - 0.5*(delta**2))
return np.sum(loss)
# log cosh loss
def logcosh(true, pred):
loss = np.log(np.cosh(pred - true))
return np.sum(loss)
# true: Array of true target variable
# pred: Array of predictions
def mse(true, pred):
return np.sum((true - pred)**2)
def mae(true, pred):
return np.sum(np.abs(true - pred))
# also available in sklearn
@groverpr
groverpr / dataloader cf
Created December 29, 2017 08:16
dataloader for columnar data cf
x = ratings.drop([‘rating’],axis=1)
y = ratings[‘rating’].astype(np.float32)
data = ColumnarModelData.from_data_frame(path, val_indx, x, y, [‘userId’, ‘movieId’], 64)
# n_users: count unique users (671), n_movies: count unique movies (9066)
model = EmbeddingNet(n_users, n_movies)
# model.parameters() for back-propagation of weights
# lr = 1e-3, weight decay = 1e-5 and using adam optimizer
opt = optim.Adam(model.parameters(), 1e-3, weight_decay=1e-5)
# fitting model,
fit(model, data, 3, opt, F.mse_loss)
@groverpr
groverpr / neuralnet
Created December 28, 2017 08:35
cf nn
# nh = dimension of hidden linear layer
# p1 = dropout1
# p2 = dropout2
class EmbeddingNet(nn.Module):
def __init__(self, n_users, _n_movies, nh = 10, p1 = 0.05, p2= 0.5):
super().__init__()
(self.u, self.m, self.ub, self.mb) = [get_emb(*o) for o in [
(n_users, n_factors), (n_movies, n_factors),
(n_users,1), (n_movies,1)