This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#converting the type of Invoice Date Field from string to datetime. | |
tx_data['InvoiceDate'] = pd.to_datetime(tx_data['InvoiceDate']) | |
#creating YearMonth field for the ease of reporting and visualization | |
tx_data['InvoiceYearMonth'] = tx_data['InvoiceDate'].map(lambda date: 100*date.year + date.month) | |
#calculate Revenue for each row and create a new dataframe with YearMonth - Revenue columns | |
tx_data['Revenue'] = tx_data['UnitPrice'] * tx_data['Quantity'] | |
tx_revenue = tx_data.groupby(['InvoiceYearMonth'])['Revenue'].sum().reset_index() | |
tx_revenue |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries | |
from datetime import datetime, timedelta | |
import pandas as pd | |
%matplotlib inline | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import seaborn as sns | |
from __future__ import division | |
import plotly.plotly as py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#X and Y axis inputs for Plotly graph. We use Scatter for line graphs | |
plot_data = [ | |
go.Scatter( | |
x=tx_revenue['InvoiceYearMonth'], | |
y=tx_revenue['Revenue'], | |
) | |
] | |
plot_layout = go.Layout( | |
xaxis={"type": "category"}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#using pct_change() function to see monthly percentage change | |
tx_revenue['MonthlyGrowth'] = tx_revenue['Revenue'].pct_change() | |
#showing first 5 rows | |
tx_revenue.head() | |
#visualization - line graph | |
plot_data = [ | |
go.Scatter( | |
x=tx_revenue.query("InvoiceYearMonth < 201112")['InvoiceYearMonth'], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#creating a new dataframe with UK customers only | |
tx_uk = tx_data.query("Country=='United Kingdom'").reset_index(drop=True) | |
#creating monthly active customers dataframe by counting unique Customer IDs | |
tx_monthly_active = tx_uk.groupby('InvoiceYearMonth')['CustomerID'].nunique().reset_index() | |
#print the dataframe | |
tx_monthly_active | |
#plotting the output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create a new dataframe for no. of order by using quantity field | |
tx_monthly_sales = tx_uk.groupby('InvoiceYearMonth')['Quantity'].sum().reset_index() | |
#print the dataframe | |
tx_monthly_sales | |
#plot | |
plot_data = [ | |
go.Bar( | |
x=tx_monthly_sales['InvoiceYearMonth'], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create a new dataframe for average revenue by taking the mean of it | |
tx_monthly_order_avg = tx_uk.groupby('InvoiceYearMonth')['Revenue'].mean().reset_index() | |
#print the dataframe | |
tx_monthly_order_avg | |
#plot the bar chart | |
plot_data = [ | |
go.Bar( | |
x=tx_monthly_order_avg['InvoiceYearMonth'], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create a dataframe contaning CustomerID and first purchase date | |
tx_min_purchase = tx_uk.groupby('CustomerID').InvoiceDate.min().reset_index() | |
tx_min_purchase.columns = ['CustomerID','MinPurchaseDate'] | |
tx_min_purchase['MinPurchaseYearMonth'] = tx_min_purchase['MinPurchaseDate'].map(lambda date: 100*date.year + date.month) | |
#merge first purchase date column to our main dataframe (tx_uk) | |
tx_uk = pd.merge(tx_uk, tx_min_purchase, on='CustomerID') | |
tx_uk.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create a dataframe that shows new user ratio - we also need to drop NA values (first month new user ratio is 0) | |
tx_user_ratio = tx_uk.query("UserType == 'New'").groupby(['InvoiceYearMonth'])['CustomerID'].nunique()/tx_uk.query("UserType == 'Existing'").groupby(['InvoiceYearMonth'])['CustomerID'].nunique() | |
tx_user_ratio = tx_user_ratio.reset_index() | |
tx_user_ratio = tx_user_ratio.dropna() | |
#print the dafaframe | |
tx_user_ratio | |
#plot the result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#identify which users are active by looking at their revenue per month | |
tx_user_purchase = tx_uk.groupby(['CustomerID','InvoiceYearMonth'])['Revenue'].sum().reset_index() | |
#create retention matrix with crosstab | |
tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index() | |
tx_retention.head() | |
#create an array of dictionary which keeps Retained & Total User count for each month | |
months = tx_retention.columns[2:] |
OlderNewer