karamanbk’s gists

karamanbk / g2_recency_elbow.py

Last active May 3, 2019 19:52

	from sklearn.cluster import KMeans

	sse={}
	tx_recency = tx_user[['Recency']]
	for k in range(1, 10):
	kmeans = KMeans(n_clusters=k, max_iter=1000).fit(tx_recency)
	tx_recency["clusters"] = kmeans.labels_
	sse[k] = kmeans.inertia_
	plt.figure()
	plt.plot(list(sse.keys()), list(sse.values()))

karamanbk / g2_calc_recency.py

Created May 3, 2019 19:27

	#create a generic user dataframe to keep CustomerID and new segmentation scores
	tx_user = pd.DataFrame(tx_data['CustomerID'].unique())
	tx_user.columns = ['CustomerID']

	#get the max purchase date for each customer and create a dataframe with it
	tx_max_purchase = tx_uk.groupby('CustomerID').InvoiceDate.max().reset_index()
	tx_max_purchase.columns = ['CustomerID','MaxPurchaseDate']

	#we take our observation point as the max invoice date in our dataset
	tx_max_purchase['Recency'] = (tx_max_purchase['MaxPurchaseDate'].max() - tx_max_purchase['MaxPurchaseDate']).dt.days

karamanbk / g2_g1_recap.py

Created May 3, 2019 19:25

	# import libraries
	from datetime import datetime, timedelta
	import pandas as pd
	%matplotlib inline
	import matplotlib.pyplot as plt
	import numpy as np
	import seaborn as sns
	from __future__ import division

	import plotly.plotly as py

karamanbk / g1_jpn.ipynb

Last active September 10, 2024 02:58

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

karamanbk / g1_jpn.ipynb

Created May 3, 2019 17:15

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

karamanbk / g1_cohort_retention.py

Last active June 3, 2020 05:35

	#create our retention table again with crosstab() and add firs purchase year month view
	tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index()
	tx_retention = pd.merge(tx_retention,tx_min_purchase[['CustomerID','MinPurchaseYearMonth']],on='CustomerID')
	new_column_names = [ 'm_' + str(column) for column in tx_retention.columns[:-1]]
	new_column_names.append('MinPurchaseYearMonth')
	tx_retention.columns = new_column_names

	#create the array of Retained users for each cohort monthly
	retention_array = []
	for i in range(len(months)):

karamanbk / g1_m_retention.py

Created May 3, 2019 15:57

	#identify which users are active by looking at their revenue per month
	tx_user_purchase = tx_uk.groupby(['CustomerID','InvoiceYearMonth'])['Revenue'].sum().reset_index()

	#create retention matrix with crosstab
	tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index()

	tx_retention.head()

	#create an array of dictionary which keeps Retained & Total User count for each month
	months = tx_retention.columns[2:]

karamanbk / g1_new_user_ratio.py

Created May 3, 2019 15:02

	#create a dataframe that shows new user ratio - we also need to drop NA values (first month new user ratio is 0)
	tx_user_ratio = tx_uk.query("UserType == 'New'").groupby(['InvoiceYearMonth'])['CustomerID'].nunique()/tx_uk.query("UserType == 'Existing'").groupby(['InvoiceYearMonth'])['CustomerID'].nunique()
	tx_user_ratio = tx_user_ratio.reset_index()
	tx_user_ratio = tx_user_ratio.dropna()

	#print the dafaframe
	tx_user_ratio

	#plot the result

karamanbk / g1_new_user.py

Last active May 3, 2019 14:52

	#create a dataframe contaning CustomerID and first purchase date
	tx_min_purchase = tx_uk.groupby('CustomerID').InvoiceDate.min().reset_index()
	tx_min_purchase.columns = ['CustomerID','MinPurchaseDate']
	tx_min_purchase['MinPurchaseYearMonth'] = tx_min_purchase['MinPurchaseDate'].map(lambda date: 100*date.year + date.month)

	#merge first purchase date column to our main dataframe (tx_uk)
	tx_uk = pd.merge(tx_uk, tx_min_purchase, on='CustomerID')

	tx_uk.head()

karamanbk / g1_m_avg_rev.py

Created May 3, 2019 13:10

	# create a new dataframe for average revenue by taking the mean of it
	tx_monthly_order_avg = tx_uk.groupby('InvoiceYearMonth')['Revenue'].mean().reset_index()

	#print the dataframe
	tx_monthly_order_avg

	#plot the bar chart
	plot_data = [
	go.Bar(
	x=tx_monthly_order_avg['InvoiceYearMonth'],