charanhu’s gists

charanhu / impute.py

Created December 29, 2021 06:26

	#so are we are imputing zero for missing values in target feature:

	train_df['totals.transactionRevenue'].fillna(0,inplace=True)

charanhu / boolean.py

Created December 29, 2021 06:28

	train_df['device.isMobile'] = train_df['device.isMobile'].astype(bool)

	test_df['device.isMobile'] = test_df['device.isMobile'].astype(bool)

charanhu / numericTofloat.py

Created December 29, 2021 06:30

	numeric_feat = ['visitNumber',\
	'visitStartTime',\
	'totals.hits',\
	'totals.pageviews',\
	'totals.timeOnSite',\
	'totals.transactions',\
	'totals.transactionRevenue']
	for col in numeric_feat:

	train_df[col].fillna(0,inplace=True)

charanhu / labelencode.py

Created December 29, 2021 06:31

	# source : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html

	categorical_feat = ['channelGrouping',\
	'device.browser',\
	'device.operatingSystem',\
	'device.deviceCategory',\
	'geoNetwork.continent',\
	'geoNetwork.subContinent',\
	'geoNetwork.country',\
	'geoNetwork.region',\

charanhu / timeseries.py

Created December 29, 2021 06:38

	def get_time_series_features(data,k):

	train_frame_k = data.loc[(data['date'] >= min(data['date']) + timedelta(days=168(k-1))) & (data['date'] < min(data['date']) + timedelta(days=168k)) ]

	# for train_frame_1 = aug 1st 2016 to jan 15 th 2017
	#in code k=1 and min(data['date']) = aug 1st 2016
	# so it return from aug 1st 2016 to jan 15 th 2017 (since: aug 1st 2016+168)

	test_frame_k = data.loc[(data['date'] >= max(train_frame_k['date']) + timedelta(days=46)) & (data['date'] <= max(train_frame_k['date']) + timedelta(days=108))]

charanhu / timeseriespickel.py

Created December 29, 2021 06:39

	train_test_data = pd.concat([train_df, test_df], axis=0).reset_index()

	%time train_frame_1 = get_time_series_features(train_test_data,1)
	train_frame_1.to_pickle('train_frame_1')

	%time train_frame_2 = get_time_series_features(train_test_data,2)
	train_frame_2.to_pickle('train_frame_2')

	%time train_frame_3 = get_time_series_features(train_test_data,3)
	train_frame_3.to_pickle('train_frame_3')

charanhu / baseModel.py

Created December 29, 2021 06:43

	gridParams = {
	'learning_rate': [0.005,0.01,0.015], #Learning rate
	'n_estimators': [40,100,200], #number of boosting iterations
	'num_leaves': [6,8,12,15,16], #number of leaves in full tree
	'boosting_type' : ['gbdt'],
	'objective' : ['binary'], #Binary Classification model to predict whether customer will return during test window
	'metric' : ['binary_logloss'], #Performance metric as "Binary Logloss"
	'colsample_bytree' : [0.6, 0.8, 1], #LightGBM will select 80% of features before training each tree
	'subsample' : [0.7,0.9, 1], #this will randomly select part of data without resampling
	'reg_alpha' : [0,1], #L1 regularization

charanhu / fina.py

Created December 29, 2021 06:49

	#Running Lightgbm model for 10 iterations and took average of those.
	#Source :- https://www.kaggle.com/kostoglot/winning-solution

	pr_lgb_sum = 0 #Variable to store predictions.

	print('Training and predictions')

	for i in range(10): #Running the model for 10 iterations and would be taking average of those as final value.

	print('Interation number ', i)

charanhu / rf.py

Created December 29, 2021 07:01

charanhu / lgbmclassify.py

Created December 29, 2021 07:16

	gridParams = {
	'learning_rate': [0.005,0.01,0.015], #Learning rate
	'n_estimators': [40,100,200], #number of boosting iterations
	'num_leaves': [6,8,12,15,16], #number of leaves in full tree
	'boosting_type' : ['gbdt'],
	'objective' : ['binary'], #Binary Classification model to predict whether customer will return during test window
	'metric' : ['binary_logloss'], #Performance metric as "Binary Logloss"
	'colsample_bytree' : [0.6, 0.8, 1], #LightGBM will select 80% of features before training each tree
	'subsample' : [0.7,0.9, 1], #this will randomly select part of data without resampling
	'reg_alpha' : [0,1], #L1 regularization

Charan H U charanhu