Last active
June 19, 2019 15:52
-
-
Save AloyASen/5f391b233beec301418f774484a500b8 to your computer and use it in GitHub Desktop.
IBM Watson Marketing customer Value data set used for customer analytics division at Radii Corporation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[ ]: | |
# this is a test simulated run on a pruned customer data where customer analytics is a branch of modeling the ecommerce business | |
# this is the analytics for the dataset at https://www.kaggle.com/pankajjsh06/ibm-watson-marketing-customer-value-data | |
# In[1]: | |
get_ipython().run_line_magic('matplotlib', 'inline') | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
# In[ ]: | |
# the minimal requirements are added to the repository i guess!! leets see what is in store | |
# In[2]: | |
from pathlib import Path | |
root= Path('data') | |
df = pd.read_csv(root / 'marketingCustomer.csv') | |
# In[8]: | |
# the file object is now loaded into the python compiler | |
# now find the size of the dataset | |
# In[9]: | |
df.shape | |
# In[10]: | |
df.head() | |
# In[ ]: | |
# now get the analytics on the engaged customers | |
# Understand how different customers beheave and react to different maketing strategies | |
# --- starting wwith the overall engagement rates | |
# In[11]: | |
df.groupby('Response').count() | |
# In[13]: | |
# just filter out the customer field from the above learning | |
df.groupby('Response').count()['Customer'] | |
# In[ ]: | |
# vizualize this in a bar plot | |
# In[16]: | |
ax=df.groupby('Response').count()['Customer'].plot( | |
kind='bar',color='orchid',grid=True, figsize=(10,7), title= 'Marketing Engagement') | |
# In[19]: | |
#calculate the percentages of the engaged and non engaged customers | |
df.groupby('Response').count()['Customer']/df.shape[0] | |
# In[23]: | |
#chapter 2 | |
# engagement rates by offer types presented to the customer | |
# In[25]: | |
byOfferTypeDF= df.loc[ df['Response']=='Yes'].groupby(['Renew Offer Type']).count()['Customer'] | |
# In[28]: | |
erateByOfferType= byOfferTypeDF/df.groupby(['Renew Offer Type']).count()['Customer'] | |
# In[30]: | |
ax=(erateByOfferType*100).plot(kind='bar',figsize=(7,7),color='dodgerblue',grid=True) | |
ax.set_ylabel('Engagement Rate %') | |
plt.show() | |
# In[ ]: | |
# chapter 3 | |
# classification by offer type | |
# how customers with different attributes respond to different marketing messages | |
# In[10]: | |
byOfferTypeDFrame = df.loc[ df['Response']== 'Yes'].groupby( | |
['Renew Offer Type', 'Vehicle Class']).count()['Customer']/ df.groupby('Renew Offer Type').count()['Customer'] | |
# In[11]: | |
byOfferTypeDFrame | |
# In[ ]: | |
# make the previous output more useful by using the unstack function to tabulate the columnar skewed dataset | |
# to pivot the data and extract the inner level roups to columns | |
# In[12]: | |
byOfferTypeDFrame=byOfferTypeDFrame.unstack().fillna(0) | |
#print the output | |
byOfferTypeDFrame | |
# In[13]: | |
ax= (byOfferTypeDFrame*100).plot(kind='bar', figsize=(10,7),grid=True) | |
# In[ ]: | |
# engagement rates differ by different sales channels | |
# In[14]: | |
bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([ | |
'Sales Channel']).count()['Customer']/df.groupby('Sales Channel').count()['Customer'] | |
bySalesChannelDFrame | |
# In[15]: | |
ax=(bySalesChannelDFrame*100).plot( | |
kind='bar', | |
figsize=(7,7), | |
color='palegreen', | |
grid=True) | |
ax.set_ylabel('Engagement rate %') | |
plt.show() | |
# In[ ]: | |
# we can see that the agents work better in terms of getting responses from customers | |
#lets break the results deeper with more customer attributes | |
# In[17]: | |
bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([ | |
'Sales Channel','Vehicle Size']).count()['Customer']/df.groupby('Sales Channel').count()['Customer'] | |
# unstack the data into more visible format | |
bySalesChannelDFrame=bySalesChannelDFrame.unstack().fillna(0) | |
bySalesChannelDFrame | |
# In[18]: | |
ax=(bySalesChannelDFrame*100).plot( | |
kind='bar', | |
figsize=(10,7), | |
grid=True) | |
ax.set_ylabel('Engagement rate %') | |
plt.show() | |
# In[ ]: | |
# as we can see customers with medium size vehicles resspond the best to all sales channel whereas the | |
#other customers differs slightly in terms of the engagement rates accross different sales channels | |
# In[ ]: | |
# chapter 6 | |
# engagement rates by months since policy inception | |
# In[4]: | |
byMonthsSinceInceptionDF=df.loc[df['Response']=='Yes'].groupby(by='Months Since Policy Inception' | |
)['Response'].count()/df.groupby( | |
by='Months Since Policy Inception')['Response'].count()*100 | |
# In[5]: | |
byMonthsSinceInceptionDF=byMonthsSinceInceptionDF.fillna(0) | |
byMonthsSinceInceptionDF | |
# In[6]: | |
ax=byMonthsSinceInceptionDF.fillna(0).plot( | |
figsize=(10,7), | |
title='Engagement rates by months since inception', | |
grid=True, | |
color='skyblue') | |
ax.set_xlabel('Months since policy inception') | |
ax.set_ylabel('Engagement rates in %') | |
plt.show() | |
# In[ ]: | |
# chapter 7 | |
# customer segmentation by customer lifetime value and months since inception | |
# In[7]: | |
df['Customer Lifetime Value'].describe() | |
# In[8]: | |
df["CLV Segment"] = df['Customer Lifetime Value'].apply( | |
lambda x:'High' if x >df['Customer Lifetime Value'].median() else 'Low') | |
# In[ ]: | |
#do the same thing for months since policy inception | |
# In[11]: | |
df['Months Since Policy Inception'].describe() | |
# In[12]: | |
df['Policy Age Segment']=df['Months Since Policy Inception'].apply( | |
lambda x: 'High' if x > df['Months Since Policy Inception'].median() else 'Low') | |
df.head() | |
# In[19]: | |
ax = df.loc[ | |
(df['CLV Segment']=='High') & (df['Policy Age Segment']=='High') | |
].plot.scatter( | |
x='Months Since Policy Inception', | |
y='Customer Lifetime Value', | |
logy=True, | |
color='red') | |
df.loc[ | |
(df['CLV Segment']=='Low') & (df['Policy Age Segment']=='High') | |
].plot.scatter( | |
ax=ax, | |
x='Months Since Policy Inception', | |
y='Customer Lifetime Value', | |
logy=True, | |
color='blue') | |
df.loc[ | |
(df['CLV Segment'] == 'High') & (df['Policy Age Segment'] == 'Low') | |
].plot.scatter( ax=ax, x='Months Since Policy Inception', y='Customer Lifetime Value', logy=True, color='orange' ) | |
df.loc[ | |
(df['CLV Segment'] == 'Low') & (df['Policy Age Segment'] == 'Low') | |
].plot.scatter( ax=ax, x='Months Since Policy Inception',y='Customer Lifetime Value', logy=True, color='green', grid=True, figsize=(10, 7)) | |
ax.set_ylabel('CLV (in log scale)') | |
ax.set_xlabel('Months Since Policy Inception') | |
ax.set_title('Segments by CLV and Policy Age') | |
plt.show() | |
# In[20]: | |
engagementRatesBySegmentDF=df.loc[ | |
df['Response']=='Yes' | |
].groupby([ | |
'CLV Segment', 'Policy Age Segment' | |
]).count()['Customer']/df.groupby([ | |
'CLV Segment', 'Policy Age Segment' | |
]).count()['Customer'] | |
engagementRatesBySegmentDF | |
# In[22]: | |
ax = (engagementRatesBySegmentDF.unstack()*100.0).plot( kind='bar', figsize=(10, 7), grid=True ) | |
ax.set_ylabel('Engagement Rate (%)') | |
ax.set_title('Engagement Rates by Customer Segments') | |
plt.show() | |
# In[ ]: | |
# thank you this s the end of this tutorial | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment