Skip to content

Instantly share code, notes, and snippets.

@AloyASen
Last active June 19, 2019 15:52
Show Gist options
  • Save AloyASen/5f391b233beec301418f774484a500b8 to your computer and use it in GitHub Desktop.
Save AloyASen/5f391b233beec301418f774484a500b8 to your computer and use it in GitHub Desktop.
IBM Watson Marketing customer Value data set used for customer analytics division at Radii Corporation
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# this is a test simulated run on a pruned customer data where customer analytics is a branch of modeling the ecommerce business
# this is the analytics for the dataset at https://www.kaggle.com/pankajjsh06/ibm-watson-marketing-customer-value-data
# In[1]:
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import pandas as pd
# In[ ]:
# the minimal requirements are added to the repository i guess!! leets see what is in store
# In[2]:
from pathlib import Path
root= Path('data')
df = pd.read_csv(root / 'marketingCustomer.csv')
# In[8]:
# the file object is now loaded into the python compiler
# now find the size of the dataset
# In[9]:
df.shape
# In[10]:
df.head()
# In[ ]:
# now get the analytics on the engaged customers
# Understand how different customers beheave and react to different maketing strategies
# --- starting wwith the overall engagement rates
# In[11]:
df.groupby('Response').count()
# In[13]:
# just filter out the customer field from the above learning
df.groupby('Response').count()['Customer']
# In[ ]:
# vizualize this in a bar plot
# In[16]:
ax=df.groupby('Response').count()['Customer'].plot(
kind='bar',color='orchid',grid=True, figsize=(10,7), title= 'Marketing Engagement')
# In[19]:
#calculate the percentages of the engaged and non engaged customers
df.groupby('Response').count()['Customer']/df.shape[0]
# In[23]:
#chapter 2
# engagement rates by offer types presented to the customer
# In[25]:
byOfferTypeDF= df.loc[ df['Response']=='Yes'].groupby(['Renew Offer Type']).count()['Customer']
# In[28]:
erateByOfferType= byOfferTypeDF/df.groupby(['Renew Offer Type']).count()['Customer']
# In[30]:
ax=(erateByOfferType*100).plot(kind='bar',figsize=(7,7),color='dodgerblue',grid=True)
ax.set_ylabel('Engagement Rate %')
plt.show()
# In[ ]:
# chapter 3
# classification by offer type
# how customers with different attributes respond to different marketing messages
# In[10]:
byOfferTypeDFrame = df.loc[ df['Response']== 'Yes'].groupby(
['Renew Offer Type', 'Vehicle Class']).count()['Customer']/ df.groupby('Renew Offer Type').count()['Customer']
# In[11]:
byOfferTypeDFrame
# In[ ]:
# make the previous output more useful by using the unstack function to tabulate the columnar skewed dataset
# to pivot the data and extract the inner level roups to columns
# In[12]:
byOfferTypeDFrame=byOfferTypeDFrame.unstack().fillna(0)
#print the output
byOfferTypeDFrame
# In[13]:
ax= (byOfferTypeDFrame*100).plot(kind='bar', figsize=(10,7),grid=True)
# In[ ]:
# engagement rates differ by different sales channels
# In[14]:
bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([
'Sales Channel']).count()['Customer']/df.groupby('Sales Channel').count()['Customer']
bySalesChannelDFrame
# In[15]:
ax=(bySalesChannelDFrame*100).plot(
kind='bar',
figsize=(7,7),
color='palegreen',
grid=True)
ax.set_ylabel('Engagement rate %')
plt.show()
# In[ ]:
# we can see that the agents work better in terms of getting responses from customers
#lets break the results deeper with more customer attributes
# In[17]:
bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([
'Sales Channel','Vehicle Size']).count()['Customer']/df.groupby('Sales Channel').count()['Customer']
# unstack the data into more visible format
bySalesChannelDFrame=bySalesChannelDFrame.unstack().fillna(0)
bySalesChannelDFrame
# In[18]:
ax=(bySalesChannelDFrame*100).plot(
kind='bar',
figsize=(10,7),
grid=True)
ax.set_ylabel('Engagement rate %')
plt.show()
# In[ ]:
# as we can see customers with medium size vehicles resspond the best to all sales channel whereas the
#other customers differs slightly in terms of the engagement rates accross different sales channels
# In[ ]:
# chapter 6
# engagement rates by months since policy inception
# In[4]:
byMonthsSinceInceptionDF=df.loc[df['Response']=='Yes'].groupby(by='Months Since Policy Inception'
)['Response'].count()/df.groupby(
by='Months Since Policy Inception')['Response'].count()*100
# In[5]:
byMonthsSinceInceptionDF=byMonthsSinceInceptionDF.fillna(0)
byMonthsSinceInceptionDF
# In[6]:
ax=byMonthsSinceInceptionDF.fillna(0).plot(
figsize=(10,7),
title='Engagement rates by months since inception',
grid=True,
color='skyblue')
ax.set_xlabel('Months since policy inception')
ax.set_ylabel('Engagement rates in %')
plt.show()
# In[ ]:
# chapter 7
# customer segmentation by customer lifetime value and months since inception
# In[7]:
df['Customer Lifetime Value'].describe()
# In[8]:
df["CLV Segment"] = df['Customer Lifetime Value'].apply(
lambda x:'High' if x >df['Customer Lifetime Value'].median() else 'Low')
# In[ ]:
#do the same thing for months since policy inception
# In[11]:
df['Months Since Policy Inception'].describe()
# In[12]:
df['Policy Age Segment']=df['Months Since Policy Inception'].apply(
lambda x: 'High' if x > df['Months Since Policy Inception'].median() else 'Low')
df.head()
# In[19]:
ax = df.loc[
(df['CLV Segment']=='High') & (df['Policy Age Segment']=='High')
].plot.scatter(
x='Months Since Policy Inception',
y='Customer Lifetime Value',
logy=True,
color='red')
df.loc[
(df['CLV Segment']=='Low') & (df['Policy Age Segment']=='High')
].plot.scatter(
ax=ax,
x='Months Since Policy Inception',
y='Customer Lifetime Value',
logy=True,
color='blue')
df.loc[
(df['CLV Segment'] == 'High') & (df['Policy Age Segment'] == 'Low')
].plot.scatter( ax=ax, x='Months Since Policy Inception', y='Customer Lifetime Value', logy=True, color='orange' )
df.loc[
(df['CLV Segment'] == 'Low') & (df['Policy Age Segment'] == 'Low')
].plot.scatter( ax=ax, x='Months Since Policy Inception',y='Customer Lifetime Value', logy=True, color='green', grid=True, figsize=(10, 7))
ax.set_ylabel('CLV (in log scale)')
ax.set_xlabel('Months Since Policy Inception')
ax.set_title('Segments by CLV and Policy Age')
plt.show()
# In[20]:
engagementRatesBySegmentDF=df.loc[
df['Response']=='Yes'
].groupby([
'CLV Segment', 'Policy Age Segment'
]).count()['Customer']/df.groupby([
'CLV Segment', 'Policy Age Segment'
]).count()['Customer']
engagementRatesBySegmentDF
# In[22]:
ax = (engagementRatesBySegmentDF.unstack()*100.0).plot( kind='bar', figsize=(10, 7), grid=True )
ax.set_ylabel('Engagement Rate (%)')
ax.set_title('Engagement Rates by Customer Segments')
plt.show()
# In[ ]:
# thank you this s the end of this tutorial
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment