This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import libraries | |
from datetime import datetime, timedelta,date | |
import pandas as pd | |
%matplotlib inline | |
from sklearn.metrics import classification_report,confusion_matrix | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import seaborn as sns | |
from __future__ import division | |
from sklearn.cluster import KMeans |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Revenue vs Frequency | |
tx_graph = tx_user.query("Revenue < 50000 and Frequency < 2000") | |
plot_data = [ | |
go.Scatter( | |
x=tx_graph.query("Segment == 'Low-Value'")['Frequency'], | |
y=tx_graph.query("Segment == 'Low-Value'")['Revenue'], | |
mode='markers', | |
name='Low', | |
marker= dict(size= 7, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tx_user['Segment'] = 'Low-Value' | |
tx_user.loc[tx_user['OverallScore']>2,'Segment'] = 'Mid-Value' | |
tx_user.loc[tx_user['OverallScore']>4,'Segment'] = 'High-Value' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#calculate overall score and use mean() to see details | |
tx_user['OverallScore'] = tx_user['RecencyCluster'] + tx_user['FrequencyCluster'] + tx_user['RevenueCluster'] | |
tx_user.groupby('OverallScore')['Recency','Frequency','Revenue'].mean() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#apply clustering | |
kmeans = KMeans(n_clusters=4) | |
kmeans.fit(tx_user[['Revenue']]) | |
tx_user['RevenueCluster'] = kmeans.predict(tx_user[['Revenue']]) | |
#order the cluster numbers | |
tx_user = order_cluster('RevenueCluster', 'Revenue',tx_user,True) | |
#show details of the dataframe |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#calculate revenue for each customer | |
tx_uk['Revenue'] = tx_uk['UnitPrice'] * tx_uk['Quantity'] | |
tx_revenue = tx_uk.groupby('CustomerID').Revenue.sum().reset_index() | |
#merge it with our main dataframe | |
tx_user = pd.merge(tx_user, tx_revenue, on='CustomerID') | |
#plot the histogram | |
plot_data = [ | |
go.Histogram( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#k-means | |
kmeans = KMeans(n_clusters=4) | |
kmeans.fit(tx_user[['Frequency']]) | |
tx_user['FrequencyCluster'] = kmeans.predict(tx_user[['Frequency']]) | |
#order the frequency cluster | |
tx_user = order_cluster('FrequencyCluster', 'Frequency',tx_user,True) | |
#see details of each cluster | |
tx_user.groupby('FrequencyCluster')['Frequency'].describe() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#get order counts for each user and create a dataframe with it | |
tx_frequency = tx_uk.groupby('CustomerID').InvoiceDate.count().reset_index() | |
tx_frequency.columns = ['CustomerID','Frequency'] | |
#add this data to our main dataframe | |
tx_user = pd.merge(tx_user, tx_frequency, on='CustomerID') | |
#plot the histogram | |
plot_data = [ | |
go.Histogram( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#build 4 clusters for recency and add it to dataframe | |
kmeans = KMeans(n_clusters=4) | |
kmeans.fit(tx_user[['Recency']]) | |
tx_user['RecencyCluster'] = kmeans.predict(tx_user[['Recency']]) | |
#function for ordering cluster numbers | |
def order_cluster(cluster_field_name, target_field_name,df,ascending): | |
new_cluster_field_name = 'new_' + cluster_field_name | |
df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index() | |
df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True) |