This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#function to order cluster numbers | |
def order_cluster(cluster_field_name, target_field_name,df,ascending): | |
new_cluster_field_name = 'new_' + cluster_field_name | |
df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index() | |
df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True) | |
df_new['index'] = df_new.index | |
df_final = pd.merge(df,df_new[[cluster_field_name,'index']], on=cluster_field_name) | |
df_final = df_final.drop([cluster_field_name],axis=1) | |
df_final = df_final.rename(columns={"index":cluster_field_name}) | |
return df_final |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#plotting monthly charge | |
df_plot = df_data.copy() | |
df_plot['MonthlyCharges'] = df_plot['MonthlyCharges'].astype(int) | |
df_plot = df_plot.groupby('MonthlyCharges').Churn.mean().reset_index() | |
plot_data = [ | |
go.Scatter( | |
x=df_plot['MonthlyCharges'], | |
y=df_plot['Churn'], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Partner | |
df_plot = df_data.groupby('Partner').Churn.mean().reset_index() | |
plot_data = [ | |
go.Bar( | |
x=df_plot['Partner'], | |
y=df_plot['Churn'], | |
width = [0.5, 0.5], | |
marker=dict( | |
color=['green', 'blue']) | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import libraries | |
from datetime import datetime, timedelta,date | |
import pandas as pd | |
%matplotlib inline | |
from sklearn.metrics import classification_report,confusion_matrix | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import seaborn as sns | |
from __future__ import division |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#XGBoost Multiclassification Model | |
ltv_xgb_model = xgb.XGBClassifier(max_depth=5, learning_rate=0.1,objective= 'multi:softprob',n_jobs=-1).fit(X_train, y_train) | |
print('Accuracy of XGB classifier on training set: {:.2f}' | |
.format(ltv_xgb_model.score(X_train, y_train))) | |
print('Accuracy of XGB classifier on test set: {:.2f}' | |
.format(ltv_xgb_model.score(X_test[X_train.columns], y_test))) | |
y_pred = ltv_xgb_model.predict(X_test) | |
print(classification_report(y_test, y_pred)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#convert categorical columns to numerical | |
tx_class = pd.get_dummies(tx_cluster) | |
#calculate and show correlations | |
corr_matrix = tx_class.corr() | |
corr_matrix['LTVCluster'].sort_values(ascending=False) | |
#create X and y, X will be feature set and y is the label - LTV | |
X = tx_class.drop(['LTVCluster','m6_Revenue'],axis=1) | |
y = tx_class['LTVCluster'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#remove outliers | |
tx_merge = tx_merge[tx_merge['m6_Revenue']<tx_merge['m6_Revenue'].quantile(0.99)] | |
#creating 3 clusters | |
kmeans = KMeans(n_clusters=3) | |
kmeans.fit(tx_merge[['m6_Revenue']]) | |
tx_merge['LTVCluster'] = kmeans.predict(tx_merge[['m6_Revenue']]) | |
#order cluster number based on LTV |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tx_merge = pd.merge(tx_user, tx_user_6m, on='CustomerID', how='left') | |
tx_merge = tx_merge.fillna(0) | |
tx_graph = tx_merge.query("m6_Revenue < 30000") | |
plot_data = [ | |
go.Scatter( | |
x=tx_graph.query("Segment == 'Low-Value'")['OverallScore'], | |
y=tx_graph.query("Segment == 'Low-Value'")['m6_Revenue'], | |
mode='markers', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#calculate revenue and create a new dataframe for it | |
tx_6m['Revenue'] = tx_6m['UnitPrice'] * tx_6m['Quantity'] | |
tx_user_6m = tx_6m.groupby('CustomerID')['Revenue'].sum().reset_index() | |
tx_user_6m.columns = ['CustomerID','m6_Revenue'] | |
#plot LTV histogram | |
plot_data = [ | |
go.Histogram( | |
x=tx_user_6m.query('m6_Revenue < 10000')['m6_Revenue'] |