karamanbk’s gists

karamanbk / g1_monthly_revenue.py

Last active January 3, 2022 09:20

	#converting the type of Invoice Date Field from string to datetime.
	tx_data['InvoiceDate'] = pd.to_datetime(tx_data['InvoiceDate'])

	#creating YearMonth field for the ease of reporting and visualization
	tx_data['InvoiceYearMonth'] = tx_data['InvoiceDate'].map(lambda date: 100*date.year + date.month)

	#calculate Revenue for each row and create a new dataframe with YearMonth - Revenue columns
	tx_data['Revenue'] = tx_data['UnitPrice'] * tx_data['Quantity']
	tx_revenue = tx_data.groupby(['InvoiceYearMonth'])['Revenue'].sum().reset_index()
	tx_revenue

karamanbk / g1_load_data.py

Last active October 19, 2023 03:59

	# import libraries
	from datetime import datetime, timedelta
	import pandas as pd
	%matplotlib inline
	import matplotlib.pyplot as plt
	import numpy as np
	import seaborn as sns
	from __future__ import division

	import plotly.plotly as py

karamanbk / g1_visual_revenue.py

Last active May 11, 2020 12:12

	#X and Y axis inputs for Plotly graph. We use Scatter for line graphs
	plot_data = [
	go.Scatter(
	x=tx_revenue['InvoiceYearMonth'],
	y=tx_revenue['Revenue'],
	)
	]

	plot_layout = go.Layout(
	xaxis={"type": "category"},

karamanbk / g1_m_revenue_growth.py

Created May 3, 2019 11:30

	#using pct_change() function to see monthly percentage change
	tx_revenue['MonthlyGrowth'] = tx_revenue['Revenue'].pct_change()

	#showing first 5 rows
	tx_revenue.head()

	#visualization - line graph
	plot_data = [
	go.Scatter(
	x=tx_revenue.query("InvoiceYearMonth < 201112")['InvoiceYearMonth'],

karamanbk / g1_m_active_users.py

Last active May 3, 2019 14:46

	#creating a new dataframe with UK customers only
	tx_uk = tx_data.query("Country=='United Kingdom'").reset_index(drop=True)

	#creating monthly active customers dataframe by counting unique Customer IDs
	tx_monthly_active = tx_uk.groupby('InvoiceYearMonth')['CustomerID'].nunique().reset_index()

	#print the dataframe
	tx_monthly_active

	#plotting the output

karamanbk / g1_m_order.py

Created May 3, 2019 13:02

	#create a new dataframe for no. of order by using quantity field
	tx_monthly_sales = tx_uk.groupby('InvoiceYearMonth')['Quantity'].sum().reset_index()

	#print the dataframe
	tx_monthly_sales

	#plot
	plot_data = [
	go.Bar(
	x=tx_monthly_sales['InvoiceYearMonth'],

karamanbk / g1_m_avg_rev.py

Created May 3, 2019 13:10

	# create a new dataframe for average revenue by taking the mean of it
	tx_monthly_order_avg = tx_uk.groupby('InvoiceYearMonth')['Revenue'].mean().reset_index()

	#print the dataframe
	tx_monthly_order_avg

	#plot the bar chart
	plot_data = [
	go.Bar(
	x=tx_monthly_order_avg['InvoiceYearMonth'],

karamanbk / g1_new_user.py

Last active May 3, 2019 14:52

	#create a dataframe contaning CustomerID and first purchase date
	tx_min_purchase = tx_uk.groupby('CustomerID').InvoiceDate.min().reset_index()
	tx_min_purchase.columns = ['CustomerID','MinPurchaseDate']
	tx_min_purchase['MinPurchaseYearMonth'] = tx_min_purchase['MinPurchaseDate'].map(lambda date: 100*date.year + date.month)

	#merge first purchase date column to our main dataframe (tx_uk)
	tx_uk = pd.merge(tx_uk, tx_min_purchase, on='CustomerID')

	tx_uk.head()

karamanbk / g1_new_user_ratio.py

Created May 3, 2019 15:02

	#create a dataframe that shows new user ratio - we also need to drop NA values (first month new user ratio is 0)
	tx_user_ratio = tx_uk.query("UserType == 'New'").groupby(['InvoiceYearMonth'])['CustomerID'].nunique()/tx_uk.query("UserType == 'Existing'").groupby(['InvoiceYearMonth'])['CustomerID'].nunique()
	tx_user_ratio = tx_user_ratio.reset_index()
	tx_user_ratio = tx_user_ratio.dropna()

	#print the dafaframe
	tx_user_ratio

	#plot the result

karamanbk / g1_m_retention.py

Created May 3, 2019 15:57

	#identify which users are active by looking at their revenue per month
	tx_user_purchase = tx_uk.groupby(['CustomerID','InvoiceYearMonth'])['Revenue'].sum().reset_index()

	#create retention matrix with crosstab
	tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index()

	tx_retention.head()

	#create an array of dictionary which keeps Retained & Total User count for each month
	months = tx_retention.columns[2:]