This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create our retention table again with crosstab() and add firs purchase year month view | |
tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index() | |
tx_retention = pd.merge(tx_retention,tx_min_purchase[['CustomerID','MinPurchaseYearMonth']],on='CustomerID') | |
new_column_names = [ 'm_' + str(column) for column in tx_retention.columns[:-1]] | |
new_column_names.append('MinPurchaseYearMonth') | |
tx_retention.columns = new_column_names | |
#create the array of Retained users for each cohort monthly | |
retention_array = [] | |
for i in range(len(months)): |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries | |
from datetime import datetime, timedelta | |
import pandas as pd | |
%matplotlib inline | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import seaborn as sns | |
from __future__ import division | |
import plotly.plotly as py |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create a generic user dataframe to keep CustomerID and new segmentation scores | |
tx_user = pd.DataFrame(tx_data['CustomerID'].unique()) | |
tx_user.columns = ['CustomerID'] | |
#get the max purchase date for each customer and create a dataframe with it | |
tx_max_purchase = tx_uk.groupby('CustomerID').InvoiceDate.max().reset_index() | |
tx_max_purchase.columns = ['CustomerID','MaxPurchaseDate'] | |
#we take our observation point as the max invoice date in our dataset | |
tx_max_purchase['Recency'] = (tx_max_purchase['MaxPurchaseDate'].max() - tx_max_purchase['MaxPurchaseDate']).dt.days |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.cluster import KMeans | |
sse={} | |
tx_recency = tx_user[['Recency']] | |
for k in range(1, 10): | |
kmeans = KMeans(n_clusters=k, max_iter=1000).fit(tx_recency) | |
tx_recency["clusters"] = kmeans.labels_ | |
sse[k] = kmeans.inertia_ | |
plt.figure() | |
plt.plot(list(sse.keys()), list(sse.values())) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#build 4 clusters for recency and add it to dataframe | |
kmeans = KMeans(n_clusters=4) | |
kmeans.fit(tx_user[['Recency']]) | |
tx_user['RecencyCluster'] = kmeans.predict(tx_user[['Recency']]) | |
#function for ordering cluster numbers | |
def order_cluster(cluster_field_name, target_field_name,df,ascending): | |
new_cluster_field_name = 'new_' + cluster_field_name | |
df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index() | |
df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#get order counts for each user and create a dataframe with it | |
tx_frequency = tx_uk.groupby('CustomerID').InvoiceDate.count().reset_index() | |
tx_frequency.columns = ['CustomerID','Frequency'] | |
#add this data to our main dataframe | |
tx_user = pd.merge(tx_user, tx_frequency, on='CustomerID') | |
#plot the histogram | |
plot_data = [ | |
go.Histogram( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#k-means | |
kmeans = KMeans(n_clusters=4) | |
kmeans.fit(tx_user[['Frequency']]) | |
tx_user['FrequencyCluster'] = kmeans.predict(tx_user[['Frequency']]) | |
#order the frequency cluster | |
tx_user = order_cluster('FrequencyCluster', 'Frequency',tx_user,True) | |
#see details of each cluster | |
tx_user.groupby('FrequencyCluster')['Frequency'].describe() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#calculate revenue for each customer | |
tx_uk['Revenue'] = tx_uk['UnitPrice'] * tx_uk['Quantity'] | |
tx_revenue = tx_uk.groupby('CustomerID').Revenue.sum().reset_index() | |
#merge it with our main dataframe | |
tx_user = pd.merge(tx_user, tx_revenue, on='CustomerID') | |
#plot the histogram | |
plot_data = [ | |
go.Histogram( |