This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#apply clustering | |
kmeans = KMeans(n_clusters=4) | |
kmeans.fit(tx_user[['Revenue']]) | |
tx_user['RevenueCluster'] = kmeans.predict(tx_user[['Revenue']]) | |
#order the cluster numbers | |
tx_user = order_cluster('RevenueCluster', 'Revenue',tx_user,True) | |
#show details of the dataframe |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#calculate overall score and use mean() to see details | |
tx_user['OverallScore'] = tx_user['RecencyCluster'] + tx_user['FrequencyCluster'] + tx_user['RevenueCluster'] | |
tx_user.groupby('OverallScore')['Recency','Frequency','Revenue'].mean() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tx_user['Segment'] = 'Low-Value' | |
tx_user.loc[tx_user['OverallScore']>2,'Segment'] = 'Mid-Value' | |
tx_user.loc[tx_user['OverallScore']>4,'Segment'] = 'High-Value' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Revenue vs Frequency | |
tx_graph = tx_user.query("Revenue < 50000 and Frequency < 2000") | |
plot_data = [ | |
go.Scatter( | |
x=tx_graph.query("Segment == 'Low-Value'")['Frequency'], | |
y=tx_graph.query("Segment == 'Low-Value'")['Revenue'], | |
mode='markers', | |
name='Low', | |
marker= dict(size= 7, |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import libraries | |
from datetime import datetime, timedelta,date | |
import pandas as pd | |
%matplotlib inline | |
from sklearn.metrics import classification_report,confusion_matrix | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import seaborn as sns | |
from __future__ import division | |
from sklearn.cluster import KMeans |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#calculate revenue and create a new dataframe for it | |
tx_6m['Revenue'] = tx_6m['UnitPrice'] * tx_6m['Quantity'] | |
tx_user_6m = tx_6m.groupby('CustomerID')['Revenue'].sum().reset_index() | |
tx_user_6m.columns = ['CustomerID','m6_Revenue'] | |
#plot LTV histogram | |
plot_data = [ | |
go.Histogram( | |
x=tx_user_6m.query('m6_Revenue < 10000')['m6_Revenue'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tx_merge = pd.merge(tx_user, tx_user_6m, on='CustomerID', how='left') | |
tx_merge = tx_merge.fillna(0) | |
tx_graph = tx_merge.query("m6_Revenue < 30000") | |
plot_data = [ | |
go.Scatter( | |
x=tx_graph.query("Segment == 'Low-Value'")['OverallScore'], | |
y=tx_graph.query("Segment == 'Low-Value'")['m6_Revenue'], | |
mode='markers', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#remove outliers | |
tx_merge = tx_merge[tx_merge['m6_Revenue']<tx_merge['m6_Revenue'].quantile(0.99)] | |
#creating 3 clusters | |
kmeans = KMeans(n_clusters=3) | |
kmeans.fit(tx_merge[['m6_Revenue']]) | |
tx_merge['LTVCluster'] = kmeans.predict(tx_merge[['m6_Revenue']]) | |
#order cluster number based on LTV |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#convert categorical columns to numerical | |
tx_class = pd.get_dummies(tx_cluster) | |
#calculate and show correlations | |
corr_matrix = tx_class.corr() | |
corr_matrix['LTVCluster'].sort_values(ascending=False) | |
#create X and y, X will be feature set and y is the label - LTV | |
X = tx_class.drop(['LTVCluster','m6_Revenue'],axis=1) | |
y = tx_class['LTVCluster'] |