AloyASen · June 19, 2019 15:52
diff --git a/Customer_Prediction.py b/Customer_Prediction.py
 #!/usr/bin/env python
 # coding: utf-8

 # In[ ]:


 # this is a test simulated run on a pruned customer data where customer analytics is a branch of modeling the ecommerce business
 # this is the analytics for the dataset at https://www.kaggle.com/pankajjsh06/ibm-watson-marketing-customer-value-data

 # In[1]:


 get_ipython().run_line_magic('matplotlib', 'inline')
 import matplotlib.pyplot as plt
 import pandas as pd


 # In[ ]:


 # the minimal requirements are added to the repository i guess!! leets see what is in store


 # In[2]:


 from pathlib import Path
 root= Path('data')
 df = pd.read_csv(root / 'marketingCustomer.csv')


 # In[8]:


 # the file object is now loaded into the python compiler
 # now find the size of the dataset


 # In[9]:


 df.shape


 # In[10]:


 df.head()


 # In[ ]:


 # now get the analytics on the engaged customers  

 # Understand how different customers beheave and react to different maketing strategies

 # --- starting wwith  the overall engagement rates 


 # In[11]:


 df.groupby('Response').count()


 # In[13]:


 # just filter out the customer field from the above learning

 df.groupby('Response').count()['Customer']


 # In[ ]:


 # vizualize this in a bar plot


 # In[16]:


 ax=df.groupby('Response').count()['Customer'].plot(
 kind='bar',color='orchid',grid=True, figsize=(10,7), title= 'Marketing Engagement')


 # In[19]:


 #calculate the percentages of the engaged and non engaged customers

 df.groupby('Response').count()['Customer']/df.shape[0]


 # In[23]:


 #chapter 2

 # engagement rates by offer types presented to the customer


 # In[25]:


 byOfferTypeDF= df.loc[ df['Response']=='Yes'].groupby(['Renew Offer Type']).count()['Customer']


 # In[28]:


 erateByOfferType= byOfferTypeDF/df.groupby(['Renew Offer Type']).count()['Customer']


 # In[30]:


 ax=(erateByOfferType*100).plot(kind='bar',figsize=(7,7),color='dodgerblue',grid=True)

 ax.set_ylabel('Engagement Rate %')
 plt.show()


 # In[ ]:


 # chapter 3 

 # classification by offer type

 # how customers with different attributes respond to different marketing messages


 # In[10]:


 byOfferTypeDFrame = df.loc[ df['Response']== 'Yes'].groupby(
    ['Renew Offer Type', 'Vehicle Class']).count()['Customer']/ df.groupby('Renew Offer Type').count()['Customer']


 # In[11]:


 byOfferTypeDFrame


 # In[ ]:


 # make the previous output more useful by using the unstack function to tabulate the columnar skewed dataset

 # to pivot the data and extract the inner level roups to columns


 # In[12]:


 byOfferTypeDFrame=byOfferTypeDFrame.unstack().fillna(0)

 #print the output 

 byOfferTypeDFrame


 # In[13]:


 ax= (byOfferTypeDFrame*100).plot(kind='bar', figsize=(10,7),grid=True)


 # In[ ]:


 # engagement rates differ by different sales channels


 # In[14]:


 bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([
                    'Sales Channel']).count()['Customer']/df.groupby('Sales Channel').count()['Customer']
 bySalesChannelDFrame


 # In[15]:


 ax=(bySalesChannelDFrame*100).plot(
        kind='bar',
        figsize=(7,7),
        color='palegreen',
        grid=True)
 ax.set_ylabel('Engagement rate %')
 plt.show()


 # In[ ]:


 # we can see that the agents work better in terms of getting responses from customers

 #lets break the results deeper with more customer attributes


 # In[17]:


 bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([
                    'Sales Channel','Vehicle Size']).count()['Customer']/df.groupby('Sales Channel').count()['Customer']
 # unstack the data into more visible format 

 bySalesChannelDFrame=bySalesChannelDFrame.unstack().fillna(0)
 bySalesChannelDFrame


 # In[18]:


 ax=(bySalesChannelDFrame*100).plot(
        kind='bar',
        figsize=(10,7),
        grid=True)
 ax.set_ylabel('Engagement rate %')
 plt.show()


 # In[ ]:


 # as we can see customers with medium size vehicles resspond the best to all sales channel whereas the 
 #other customers differs slightly in terms of the engagement rates accross different sales channels


 # In[ ]:


 # chapter 6

 # engagement rates by months since policy inception


 # In[4]:


 byMonthsSinceInceptionDF=df.loc[df['Response']=='Yes'].groupby(by='Months Since Policy Inception'
                                                              )['Response'].count()/df.groupby(
                                                    by='Months Since Policy Inception')['Response'].count()*100


 # In[5]:


 byMonthsSinceInceptionDF=byMonthsSinceInceptionDF.fillna(0)

 byMonthsSinceInceptionDF


 # In[6]:


 ax=byMonthsSinceInceptionDF.fillna(0).plot(
        figsize=(10,7),
        title='Engagement rates by months since inception',
        grid=True,
        color='skyblue')

 ax.set_xlabel('Months since policy inception')
 ax.set_ylabel('Engagement rates in %')

 plt.show()


 # In[ ]:


 # chapter 7 

 # customer segmentation by customer lifetime value and months since inception


 # In[7]:


 df['Customer Lifetime Value'].describe()


 # In[8]:


 df["CLV Segment"] = df['Customer Lifetime Value'].apply(
        lambda x:'High' if x >df['Customer Lifetime Value'].median() else 'Low')


 # In[ ]:


 #do the same thing for months since policy inception


 # In[11]:


 df['Months Since Policy Inception'].describe()


 # In[12]:


 df['Policy Age Segment']=df['Months Since Policy Inception'].apply(
    lambda x: 'High' if x > df['Months Since Policy Inception'].median() else 'Low')
 df.head()


 # In[19]:


 ax = df.loc[
    (df['CLV Segment']=='High') & (df['Policy Age Segment']=='High')
 ].plot.scatter(
 x='Months Since Policy Inception',
 y='Customer Lifetime Value',
 logy=True,
 color='red')
 df.loc[
    (df['CLV Segment']=='Low') & (df['Policy Age Segment']=='High')
 ].plot.scatter(
 ax=ax,
 x='Months Since Policy Inception',
 y='Customer Lifetime Value',
 logy=True,
 color='blue')

 df.loc[ 
    (df['CLV Segment'] == 'High') & (df['Policy Age Segment'] == 'Low')
 ].plot.scatter( ax=ax, x='Months Since Policy Inception', y='Customer Lifetime Value', logy=True, color='orange' )

 df.loc[ 
    (df['CLV Segment'] == 'Low') & (df['Policy Age Segment'] == 'Low') 
 ].plot.scatter( ax=ax, x='Months Since Policy Inception',y='Customer Lifetime Value', logy=True, color='green', grid=True, figsize=(10, 7))

 ax.set_ylabel('CLV (in log scale)')
 ax.set_xlabel('Months Since Policy Inception')
 ax.set_title('Segments by CLV and Policy Age')
 plt.show()


 # In[20]:


 engagementRatesBySegmentDF=df.loc[
    df['Response']=='Yes'
 ].groupby([
    'CLV Segment', 'Policy Age Segment'
 ]).count()['Customer']/df.groupby([
    'CLV Segment', 'Policy Age Segment'
 ]).count()['Customer']

 engagementRatesBySegmentDF


 # In[22]:


 ax = (engagementRatesBySegmentDF.unstack()*100.0).plot( kind='bar', figsize=(10, 7), grid=True )
 ax.set_ylabel('Engagement Rate (%)')
 ax.set_title('Engagement Rates by Customer Segments')
 plt.show()


 # In[ ]:


 # thank you this s the end of this tutorial
	#!/usr/bin/env python
	# coding: utf-8

	# In[ ]:


	# this is a test simulated run on a pruned customer data where customer analytics is a branch of modeling the ecommerce business
	# this is the analytics for the dataset at https://www.kaggle.com/pankajjsh06/ibm-watson-marketing-customer-value-data

	# In[1]:


	get_ipython().run_line_magic('matplotlib', 'inline')
	import matplotlib.pyplot as plt
	import pandas as pd


	# In[ ]:


	# the minimal requirements are added to the repository i guess!! leets see what is in store


	# In[2]:


	from pathlib import Path
	root= Path('data')
	df = pd.read_csv(root / 'marketingCustomer.csv')


	# In[8]:


	# the file object is now loaded into the python compiler
	# now find the size of the dataset


	# In[9]:


	df.shape


	# In[10]:


	df.head()


	# In[ ]:


	# now get the analytics on the engaged customers

	# Understand how different customers beheave and react to different maketing strategies

	# --- starting wwith the overall engagement rates


	# In[11]:


	df.groupby('Response').count()


	# In[13]:


	# just filter out the customer field from the above learning

	df.groupby('Response').count()['Customer']


	# In[ ]:


	# vizualize this in a bar plot


	# In[16]:


	ax=df.groupby('Response').count()['Customer'].plot(
	kind='bar',color='orchid',grid=True, figsize=(10,7), title= 'Marketing Engagement')


	# In[19]:


	#calculate the percentages of the engaged and non engaged customers

	df.groupby('Response').count()['Customer']/df.shape[0]


	# In[23]:


	#chapter 2

	# engagement rates by offer types presented to the customer


	# In[25]:


	byOfferTypeDF= df.loc[ df['Response']=='Yes'].groupby(['Renew Offer Type']).count()['Customer']


	# In[28]:


	erateByOfferType= byOfferTypeDF/df.groupby(['Renew Offer Type']).count()['Customer']


	# In[30]:


	ax=(erateByOfferType*100).plot(kind='bar',figsize=(7,7),color='dodgerblue',grid=True)

	ax.set_ylabel('Engagement Rate %')
	plt.show()


	# In[ ]:


	# chapter 3

	# classification by offer type

	# how customers with different attributes respond to different marketing messages


	# In[10]:


	byOfferTypeDFrame = df.loc[ df['Response']== 'Yes'].groupby(
	['Renew Offer Type', 'Vehicle Class']).count()['Customer']/ df.groupby('Renew Offer Type').count()['Customer']


	# In[11]:


	byOfferTypeDFrame


	# In[ ]:


	# make the previous output more useful by using the unstack function to tabulate the columnar skewed dataset

	# to pivot the data and extract the inner level roups to columns


	# In[12]:


	byOfferTypeDFrame=byOfferTypeDFrame.unstack().fillna(0)

	#print the output

	byOfferTypeDFrame


	# In[13]:


	ax= (byOfferTypeDFrame*100).plot(kind='bar', figsize=(10,7),grid=True)


	# In[ ]:


	# engagement rates differ by different sales channels


	# In[14]:


	bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([
	'Sales Channel']).count()['Customer']/df.groupby('Sales Channel').count()['Customer']
	bySalesChannelDFrame


	# In[15]:


	ax=(bySalesChannelDFrame*100).plot(
	kind='bar',
	figsize=(7,7),
	color='palegreen',
	grid=True)
	ax.set_ylabel('Engagement rate %')
	plt.show()


	# In[ ]:


	# we can see that the agents work better in terms of getting responses from customers

	#lets break the results deeper with more customer attributes


	# In[17]:


	bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([
	'Sales Channel','Vehicle Size']).count()['Customer']/df.groupby('Sales Channel').count()['Customer']
	# unstack the data into more visible format

	bySalesChannelDFrame=bySalesChannelDFrame.unstack().fillna(0)
	bySalesChannelDFrame


	# In[18]:


	ax=(bySalesChannelDFrame*100).plot(
	kind='bar',
	figsize=(10,7),
	grid=True)
	ax.set_ylabel('Engagement rate %')
	plt.show()


	# In[ ]:


	# as we can see customers with medium size vehicles resspond the best to all sales channel whereas the
	#other customers differs slightly in terms of the engagement rates accross different sales channels


	# In[ ]:


	# chapter 6

	# engagement rates by months since policy inception


	# In[4]:


	byMonthsSinceInceptionDF=df.loc[df['Response']=='Yes'].groupby(by='Months Since Policy Inception'
	)['Response'].count()/df.groupby(
	by='Months Since Policy Inception')['Response'].count()*100


	# In[5]:


	byMonthsSinceInceptionDF=byMonthsSinceInceptionDF.fillna(0)

	byMonthsSinceInceptionDF


	# In[6]:


	ax=byMonthsSinceInceptionDF.fillna(0).plot(
	figsize=(10,7),
	title='Engagement rates by months since inception',
	grid=True,
	color='skyblue')

	ax.set_xlabel('Months since policy inception')
	ax.set_ylabel('Engagement rates in %')

	plt.show()


	# In[ ]:


	# chapter 7

	# customer segmentation by customer lifetime value and months since inception


	# In[7]:


	df['Customer Lifetime Value'].describe()


	# In[8]:


	df["CLV Segment"] = df['Customer Lifetime Value'].apply(
	lambda x:'High' if x >df['Customer Lifetime Value'].median() else 'Low')


	# In[ ]:


	#do the same thing for months since policy inception


	# In[11]:


	df['Months Since Policy Inception'].describe()


	# In[12]:


	df['Policy Age Segment']=df['Months Since Policy Inception'].apply(
	lambda x: 'High' if x > df['Months Since Policy Inception'].median() else 'Low')
	df.head()


	# In[19]:


	ax = df.loc[
	(df['CLV Segment']=='High') & (df['Policy Age Segment']=='High')
	].plot.scatter(
	x='Months Since Policy Inception',
	y='Customer Lifetime Value',
	logy=True,
	color='red')
	df.loc[
	(df['CLV Segment']=='Low') & (df['Policy Age Segment']=='High')
	].plot.scatter(
	ax=ax,
	x='Months Since Policy Inception',
	y='Customer Lifetime Value',
	logy=True,
	color='blue')

	df.loc[
	(df['CLV Segment'] == 'High') & (df['Policy Age Segment'] == 'Low')
	].plot.scatter( ax=ax, x='Months Since Policy Inception', y='Customer Lifetime Value', logy=True, color='orange' )

	df.loc[
	(df['CLV Segment'] == 'Low') & (df['Policy Age Segment'] == 'Low')
	].plot.scatter( ax=ax, x='Months Since Policy Inception',y='Customer Lifetime Value', logy=True, color='green', grid=True, figsize=(10, 7))

	ax.set_ylabel('CLV (in log scale)')
	ax.set_xlabel('Months Since Policy Inception')
	ax.set_title('Segments by CLV and Policy Age')
	plt.show()


	# In[20]:


	engagementRatesBySegmentDF=df.loc[
	df['Response']=='Yes'
	].groupby([
	'CLV Segment', 'Policy Age Segment'
	]).count()['Customer']/df.groupby([
	'CLV Segment', 'Policy Age Segment'
	]).count()['Customer']

	engagementRatesBySegmentDF


	# In[22]:


	ax = (engagementRatesBySegmentDF.unstack()*100.0).plot( kind='bar', figsize=(10, 7), grid=True )
	ax.set_ylabel('Engagement Rate (%)')
	ax.set_title('Engagement Rates by Customer Segments')
	plt.show()


	# In[ ]:


	# thank you this s the end of this tutorial