Prince Grover groverpr

Senior Applied Scientist at Amazon

groverpr / catboost_target_encoder

Created June 30, 2019 22:25

	def catboost_target_encoder(train, test, cols_encode, target):
	train_new = train.copy()
	test_new = test.copy()
	for column in cols_encode:
	global_mean = train[target].mean()
	cumulative_sum = train.groupby(column)[target].cumsum() - train[target]
	cumulative_count = train.groupby(column).cumcount()
	train_new[column + "_cat_mean_target"] = cumulative_sum/cumulative_count
	train_new[column + "_cat_mean_target"].fillna(global_mean, inplace=True)

groverpr / reg_target_encoding

Last active June 30, 2019 22:19

	from sklearn.model_selection import KFold

	def target_encoder_kfold(train_data, test_data, cols_encode, target, folds=10):
	"""
	Mean regularized target encoding based on kfold
	"""
	kf = KFold(n_splits=folds, random_state=1)
	for col in cols_encode:
	global_mean = train_data[target].mean()
	for train_index, test_index in kf.split(train_data):

groverpr / simulating_friedman

Created September 11, 2018 22:28

simulating friedman data for comparison studies

	# simulating 10,000 data points with 2 useless and 5 uniformly distributed features
	X, y = make_friedman1(n_samples=10000, n_features=7, noise=0.0, random_state=11)

	# train-validation split
	X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.20, random_state=42)

groverpr / lightgbm_objective

Last active September 11, 2023 02:48

How to use objective and evaluation in lightgbm

	import lightgbm

	******* Sklearn API ********
	# default lightgbm model with sklearn api
	gbm = lightgbm.LGBMRegressor()

	# updating objective function to custom
	# default is "regression"
	# also adding metrics to check different scores
	gbm.set_params(**{'objective': custom_asymmetric_train}, metrics = ["mse", 'mae'])

groverpr / custom_loss

Last active September 26, 2018 18:14

How to write custom objective and custom eval metric in lightgbm

	def custom_asymmetric_train(y_true, y_pred):
	residual = (y_true - y_pred).astype("float")
	grad = np.where(residual<0, -210.0residual, -2*residual)
	hess = np.where(residual<0, 2*10.0, 2.0)
	return grad, hess

	def custom_asymmetric_valid(y_true, y_pred):
	residual = (y_true - y_pred).astype("float")
	loss = np.where(residual < 0, (residual*2)10.0, residual**2)
	return "custom_asymmetric_eval", np.mean(loss), False

groverpr / loss_regression

Created May 14, 2018 02:47

	# huber loss
	def huber(true, pred, delta):
	loss = np.where(np.abs(true-pred) < delta , 0.5((true-pred)2), deltanp.abs(true - pred) - 0.5(delta*2))
	return np.sum(loss)

	# log cosh loss
	def logcosh(true, pred):
	loss = np.log(np.cosh(pred - true))
	return np.sum(loss)

groverpr / mas

Created May 13, 2018 02:31

	# true: Array of true target variable
	# pred: Array of predictions

	def mse(true, pred):
	return np.sum((true - pred)**2)

	def mae(true, pred):
	return np.sum(np.abs(true - pred))

	# also available in sklearn

groverpr / dataloader cf

Created December 29, 2017 08:16

dataloader for columnar data cf

	x = ratings.drop([‘rating’],axis=1)
	y = ratings[‘rating’].astype(np.float32)
	data = ColumnarModelData.from_data_frame(path, val_indx, x, y, [‘userId’, ‘movieId’], 64)

groverpr / gist:4f7528a52ac20a8db641c8fdc1e71eb5

Created December 29, 2017 08:15

fitting model cf

	# n_users: count unique users (671), n_movies: count unique movies (9066)
	model = EmbeddingNet(n_users, n_movies)

	# model.parameters() for back-propagation of weights
	# lr = 1e-3, weight decay = 1e-5 and using adam optimizer
	opt = optim.Adam(model.parameters(), 1e-3, weight_decay=1e-5)

	# fitting model,
	fit(model, data, 3, opt, F.mse_loss)

groverpr / neuralnet

Created December 28, 2017 08:35

cf nn

	# nh = dimension of hidden linear layer
	# p1 = dropout1
	# p2 = dropout2

	class EmbeddingNet(nn.Module):
	def __init__(self, n_users, _n_movies, nh = 10, p1 = 0.05, p2= 0.5):
	super().__init__()
	(self.u, self.m, self.ub, self.mb) = [get_emb(*o) for o in [
	(n_users, n_factors), (n_movies, n_factors),
	(n_users,1), (n_movies,1)