Skip to content

Instantly share code, notes, and snippets.

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import poisson,expon,nbinom
poisson_lambda = 4.3
p_arr = []
distribution = poisson(poisson_lambda)
for transactions in range(0,10):
p_arr.append(distribution.pmf(transactions))
gamma_shape = 9
gamma_scale = 0.5
for customer in range(0, 100):
distribution = poisson(np.random.gamma(shape=gamma_shape, scale=gamma_scale))
p_arr = []
for transactions in range(0,9):
p_arr.append(distribution.pmf(transactions))
plt.plot(p_arr, color='black', linewidth=0.7, zorder=1)
p = 0.52
p_arr = []
for i in range(0,10):
proba_inactive = p*(1-p)**(i-1)
p_arr.append(proba_inactive)
p_arr = np.array(p_arr)
p_arr /= p_arr.sum()
plt.plot(range(1, 10), p_arr, color='black', linewidth=0.7, zorder=1)
beta_a = 2
beta_b = 3
for customer in range(0, 10):
p_arr = []
beta = np.random.beta(a=beta_a, b=beta_b)
for transaction in range(1,10):
proba_inactive = beta*(1-beta)**(transaction-1)
p_arr.append(proba_inactive)
p_arr = np.array(p_arr)
import pandas as pd
import numpy as np
from lifetimes.utils import *
from lifetimes import BetaGeoFitter
from lifetimes.plotting import plot_probability_alive_matrix, plot_frequency_recency_matrix
from lifetimes.generate_data import beta_geometric_nbd_model
import matplotlib.pyplot as plt
from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases, plot_period_transactions,plot_history_alive
bgf = BetaGeoFitter()
plot_period_transactions(bgf)
cal_hold = calibration_and_holdout_data(trans_dataset,
'customer_id',
'date',
calibration_period_end='2018-04-30', #2 years calibration
observation_period_end='2019-04-30', #1 year holdout
freq = 'M')
cal_hold.head()
bgf = BetaGeoFitter()
bgf.fit(cal_hold['frequency'], cal_hold['recency'], cal_hold['T'])
plot_frequency_recency_matrix(bgf)
plot_probability_alive_matrix(bgf)
#Calibration and Holdouts periods for data split
date_start = ['2013-04-01','2014-04-01','2015-04-01','2016-04-01']
calibration_period_end = ['2015-03-31','2016-03-31','2017-03-31','2018-03-31']
date_end = ['2016-03-31','2017-03-31','2018-03-31','2019-03-31']
#Arrays where to store the results of cross validation
accuracies_1y = []
holdouts_1y = []
predictions_1y = []
#Plot historical probability of being alive
customer_id = ..
days_since_birth = ....
sp_trans = transaction_data.loc[transaction_data['customer_id'] == customer_id]
plot_history_alive(bgf, days_since_birth, sp_trans, 'date')