Skip to content

Instantly share code, notes, and snippets.

@karamanbk
Last active May 25, 2019 11:14
Show Gist options
  • Save karamanbk/c2d29911cdc36fc57fdb865a5f5403d6 to your computer and use it in GitHub Desktop.
Save karamanbk/c2d29911cdc36fc57fdb865a5f5403d6 to your computer and use it in GitHub Desktop.
#function to order cluster numbers
def order_cluster(cluster_field_name, target_field_name,df,ascending):
new_cluster_field_name = 'new_' + cluster_field_name
df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index()
df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True)
df_new['index'] = df_new.index
df_final = pd.merge(df,df_new[[cluster_field_name,'index']], on=cluster_field_name)
df_final = df_final.drop([cluster_field_name],axis=1)
df_final = df_final.rename(columns={"index":cluster_field_name})
return df_final
#Elbow Method to identify the appropriate number of clusters
sse={}
df_cluster = df_data[['tenure']]
for k in range(1, 10):
kmeans = KMeans(n_clusters=k, max_iter=1000).fit(df_cluster)
df_cluster["clusters"] = kmeans.labels_
sse[k] = kmeans.inertia_
plt.figure()
plt.plot(list(sse.keys()), list(sse.values()))
plt.xlabel("Number of cluster")
plt.show()
#K-means logic to the selected column and change the naming
kmeans = KMeans(n_clusters=3)
kmeans.fit(df_data[['tenure']])
df_data['TenureCluster'] = kmeans.predict(df_data[['tenure']])
#order cluster numbers
df_data = order_cluster('TenureCluster', 'tenure',df_data,True)
#observe the characteristics
df_data.groupby('TenureCluster').tenure.describe()
#change the naming
df_data['TenureCluster'] = df_data["TenureCluster"].replace({0:'Low',1:'Mid',2:'High'})
#plot Churn Rate vs new clusters
df_plot = df_data.groupby('TenureCluster').Churn.mean().reset_index()
plot_data = [
go.Bar(
x=df_plot['TenureCluster'],
y=df_plot['Churn'],
width = [0.5, 0.5, 0.5,0.5],
marker=dict(
color=['green', 'blue', 'orange','red'])
)
]
plot_layout = go.Layout(
xaxis={"type": "category","categoryarray":['Low','Mid','High']},
title='Tenure Cluster vs Churn Rate',
plot_bgcolor = "rgb(243,243,243)",
paper_bgcolor = "rgb(243,243,243)",
)
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment