karamanbk’s gists

karamanbk / g3_multi_xgb.py

Last active March 31, 2020 01:23

	#XGBoost Multiclassification Model
	ltv_xgb_model = xgb.XGBClassifier(max_depth=5, learning_rate=0.1,objective= 'multi:softprob',n_jobs=-1).fit(X_train, y_train)

	print('Accuracy of XGB classifier on training set: {:.2f}'
	.format(ltv_xgb_model.score(X_train, y_train)))
	print('Accuracy of XGB classifier on test set: {:.2f}'
	.format(ltv_xgb_model.score(X_test[X_train.columns], y_test)))

	y_pred = ltv_xgb_model.predict(X_test)
	print(classification_report(y_test, y_pred))

karamanbk / g_series_3.ipynb

Last active July 22, 2024 07:10

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

karamanbk / g4_intro.py

Created May 25, 2019 09:04

	#import libraries

	from datetime import datetime, timedelta,date
	import pandas as pd
	%matplotlib inline
	from sklearn.metrics import classification_report,confusion_matrix
	import matplotlib.pyplot as plt
	import numpy as np
	import seaborn as sns
	from __future__ import division

karamanbk / g4_cat_eda.py

Created May 25, 2019 09:35

	#Partner
	df_plot = df_data.groupby('Partner').Churn.mean().reset_index()
	plot_data = [
	go.Bar(
	x=df_plot['Partner'],
	y=df_plot['Churn'],
	width = [0.5, 0.5],
	marker=dict(
	color=['green', 'blue'])
	)

karamanbk / g4_num_eda.py

Created May 25, 2019 10:46

	#plotting monthly charge
	df_plot = df_data.copy()
	df_plot['MonthlyCharges'] = df_plot['MonthlyCharges'].astype(int)
	df_plot = df_plot.groupby('MonthlyCharges').Churn.mean().reset_index()


	plot_data = [
	go.Scatter(
	x=df_plot['MonthlyCharges'],
	y=df_plot['Churn'],

karamanbk / g4_fe_tenure.py

Last active May 25, 2019 11:14

	#function to order cluster numbers
	def order_cluster(cluster_field_name, target_field_name,df,ascending):
	new_cluster_field_name = 'new_' + cluster_field_name
	df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index()
	df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True)
	df_new['index'] = df_new.index
	df_final = pd.merge(df,df_new[[cluster_field_name,'index']], on=cluster_field_name)
	df_final = df_final.drop([cluster_field_name],axis=1)
	df_final = df_final.rename(columns={"index":cluster_field_name})
	return df_final

karamanbk / g4_logistic_regression.py

Created May 25, 2019 11:49

	#preparing column names for the model
	all_columns = []
	for column in df_data.columns:
	column = column.replace(" ", "_").replace("(", "_").replace(")", "_").replace("-", "_")
	all_columns.append(column)

	df_data.columns = all_columns

	glm_columns = 'gender'

karamanbk / g_series_4.ipynb

Created May 25, 2019 12:51

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

karamanbk / g5_intro.py

Created June 2, 2019 06:51

	#import libraries
	from datetime import datetime, timedelta,date
	import pandas as pd
	%matplotlib inline
	from sklearn.metrics import classification_report,confusion_matrix
	import matplotlib.pyplot as plt
	import numpy as np
	import seaborn as sns
	from __future__ import division
	from sklearn.cluster import KMeans

karamanbk / g5_adding_label.py

Created June 2, 2019 08:09

	#create a dataframe with customer id and first purchase date in tx_next
	tx_next_first_purchase = tx_next.groupby('CustomerID').InvoiceDate.min().reset_index()
	tx_next_first_purchase.columns = ['CustomerID','MinPurchaseDate']

	#create a dataframe with customer id and last purchase date in tx_6m
	tx_last_purchase = tx_6m.groupby('CustomerID').InvoiceDate.max().reset_index()
	tx_last_purchase.columns = ['CustomerID','MaxPurchaseDate']

	#merge two dataframes
	tx_purchase_dates = pd.merge(tx_last_purchase,tx_next_first_purchase,on='CustomerID',how='left')