Skip to content

Instantly share code, notes, and snippets.

#create a new dataframe for no. of order by using quantity field
tx_monthly_sales = tx_uk.groupby('InvoiceYearMonth')['Quantity'].sum().reset_index()
#print the dataframe
tx_monthly_sales
#plot
plot_data = [
go.Bar(
x=tx_monthly_sales['InvoiceYearMonth'],
#creating a new dataframe with UK customers only
tx_uk = tx_data.query("Country=='United Kingdom'").reset_index(drop=True)
#creating monthly active customers dataframe by counting unique Customer IDs
tx_monthly_active = tx_uk.groupby('InvoiceYearMonth')['CustomerID'].nunique().reset_index()
#print the dataframe
tx_monthly_active
#plotting the output
#using pct_change() function to see monthly percentage change
tx_revenue['MonthlyGrowth'] = tx_revenue['Revenue'].pct_change()
#showing first 5 rows
tx_revenue.head()
#visualization - line graph
plot_data = [
go.Scatter(
x=tx_revenue.query("InvoiceYearMonth < 201112")['InvoiceYearMonth'],
#X and Y axis inputs for Plotly graph. We use Scatter for line graphs
plot_data = [
go.Scatter(
x=tx_revenue['InvoiceYearMonth'],
y=tx_revenue['Revenue'],
)
]
plot_layout = go.Layout(
xaxis={"type": "category"},
# import libraries
from datetime import datetime, timedelta
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from __future__ import division
import plotly.plotly as py
#converting the type of Invoice Date Field from string to datetime.
tx_data['InvoiceDate'] = pd.to_datetime(tx_data['InvoiceDate'])
#creating YearMonth field for the ease of reporting and visualization
tx_data['InvoiceYearMonth'] = tx_data['InvoiceDate'].map(lambda date: 100*date.year + date.month)
#calculate Revenue for each row and create a new dataframe with YearMonth - Revenue columns
tx_data['Revenue'] = tx_data['UnitPrice'] * tx_data['Quantity']
tx_revenue = tx_data.groupby(['InvoiceYearMonth'])['Revenue'].sum().reset_index()
tx_revenue