karamanbk’s gists

karamanbk / g4_fe_tenure.py

Last active May 25, 2019 11:14

	#function to order cluster numbers
	def order_cluster(cluster_field_name, target_field_name,df,ascending):
	new_cluster_field_name = 'new_' + cluster_field_name
	df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index()
	df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True)
	df_new['index'] = df_new.index
	df_final = pd.merge(df,df_new[[cluster_field_name,'index']], on=cluster_field_name)
	df_final = df_final.drop([cluster_field_name],axis=1)
	df_final = df_final.rename(columns={"index":cluster_field_name})
	return df_final

karamanbk / g4_num_eda.py

Created May 25, 2019 10:46

	#plotting monthly charge
	df_plot = df_data.copy()
	df_plot['MonthlyCharges'] = df_plot['MonthlyCharges'].astype(int)
	df_plot = df_plot.groupby('MonthlyCharges').Churn.mean().reset_index()


	plot_data = [
	go.Scatter(
	x=df_plot['MonthlyCharges'],
	y=df_plot['Churn'],

karamanbk / g4_cat_eda.py

Created May 25, 2019 09:35

	#Partner
	df_plot = df_data.groupby('Partner').Churn.mean().reset_index()
	plot_data = [
	go.Bar(
	x=df_plot['Partner'],
	y=df_plot['Churn'],
	width = [0.5, 0.5],
	marker=dict(
	color=['green', 'blue'])
	)

karamanbk / g4_intro.py

Created May 25, 2019 09:04

	#import libraries

	from datetime import datetime, timedelta,date
	import pandas as pd
	%matplotlib inline
	from sklearn.metrics import classification_report,confusion_matrix
	import matplotlib.pyplot as plt
	import numpy as np
	import seaborn as sns
	from __future__ import division

karamanbk / g_series_3.ipynb

Last active July 22, 2024 07:10

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

karamanbk / g3_multi_xgb.py

Last active March 31, 2020 01:23

	#XGBoost Multiclassification Model
	ltv_xgb_model = xgb.XGBClassifier(max_depth=5, learning_rate=0.1,objective= 'multi:softprob',n_jobs=-1).fit(X_train, y_train)

	print('Accuracy of XGB classifier on training set: {:.2f}'
	.format(ltv_xgb_model.score(X_train, y_train)))
	print('Accuracy of XGB classifier on test set: {:.2f}'
	.format(ltv_xgb_model.score(X_test[X_train.columns], y_test)))

	y_pred = ltv_xgb_model.predict(X_test)
	print(classification_report(y_test, y_pred))

karamanbk / g3_ltv_fe.py

Last active March 31, 2020 01:23

	#convert categorical columns to numerical
	tx_class = pd.get_dummies(tx_cluster)

	#calculate and show correlations
	corr_matrix = tx_class.corr()
	corr_matrix['LTVCluster'].sort_values(ascending=False)

	#create X and y, X will be feature set and y is the label - LTV
	X = tx_class.drop(['LTVCluster','m6_Revenue'],axis=1)
	y = tx_class['LTVCluster']

karamanbk / g3_ltv_clusters.py

Last active March 31, 2020 01:22

	#remove outliers
	tx_merge = tx_merge[tx_merge['m6_Revenue']<tx_merge['m6_Revenue'].quantile(0.99)]


	#creating 3 clusters
	kmeans = KMeans(n_clusters=3)
	kmeans.fit(tx_merge[['m6_Revenue']])
	tx_merge['LTVCluster'] = kmeans.predict(tx_merge[['m6_Revenue']])

	#order cluster number based on LTV

karamanbk / g3_merge.py

Last active March 31, 2020 01:22

	tx_merge = pd.merge(tx_user, tx_user_6m, on='CustomerID', how='left')
	tx_merge = tx_merge.fillna(0)

	tx_graph = tx_merge.query("m6_Revenue < 30000")

	plot_data = [
	go.Scatter(
	x=tx_graph.query("Segment == 'Low-Value'")['OverallScore'],
	y=tx_graph.query("Segment == 'Low-Value'")['m6_Revenue'],
	mode='markers',

karamanbk / g3_ltv.py

Created May 4, 2019 09:20

	#calculate revenue and create a new dataframe for it
	tx_6m['Revenue'] = tx_6m['UnitPrice'] * tx_6m['Quantity']
	tx_user_6m = tx_6m.groupby('CustomerID')['Revenue'].sum().reset_index()
	tx_user_6m.columns = ['CustomerID','m6_Revenue']


	#plot LTV histogram
	plot_data = [
	go.Histogram(
	x=tx_user_6m.query('m6_Revenue < 10000')['m6_Revenue']