Skip to content

Instantly share code, notes, and snippets.

library(tidyr)
library(reshape2)
library(ggplot2)
library(corrplot)
#split numeric and categorical variables
is.fact <- sapply(df1, is.factor)
df1_cat <- df1[,is.fact] %>%
mutate(count = 1) %>%
gather
df <- subset(df, select = -c( EmployeeCount, EmployeeID, Over18, StandardHours))
df1 <- df[complete.cases(df),]
library(dplyr)
employee_survey_data <- read.csv('Downloads/HR Analytics/employee_survey_data.csv')
general_data <- read.csv('Downloads/HR Analytics/general_data.csv')
manager_survey_data <- read.csv('Downloads/HR Analytics/manager_survey_data.csv')
df <- general_data %>%
left_join(employee_survey_data, by = "EmployeeID") %>%
left_join(manager_survey_data, by = "EmployeeID")
str(df)
#Plot historical probability of being alive
customer_id = ..
days_since_birth = ....
sp_trans = transaction_data.loc[transaction_data['customer_id'] == customer_id]
plot_history_alive(bgf, days_since_birth, sp_trans, 'date')
#Calibration and Holdouts periods for data split
date_start = ['2013-04-01','2014-04-01','2015-04-01','2016-04-01']
calibration_period_end = ['2015-03-31','2016-03-31','2017-03-31','2018-03-31']
date_end = ['2016-03-31','2017-03-31','2018-03-31','2019-03-31']
#Arrays where to store the results of cross validation
accuracies_1y = []
holdouts_1y = []
predictions_1y = []
plot_frequency_recency_matrix(bgf)
plot_probability_alive_matrix(bgf)
cal_hold = calibration_and_holdout_data(trans_dataset,
'customer_id',
'date',
calibration_period_end='2018-04-30', #2 years calibration
observation_period_end='2019-04-30', #1 year holdout
freq = 'M')
cal_hold.head()
bgf = BetaGeoFitter()
bgf.fit(cal_hold['frequency'], cal_hold['recency'], cal_hold['T'])
plot_period_transactions(bgf)
import pandas as pd
import numpy as np
from lifetimes.utils import *
from lifetimes import BetaGeoFitter
from lifetimes.plotting import plot_probability_alive_matrix, plot_frequency_recency_matrix
from lifetimes.generate_data import beta_geometric_nbd_model
import matplotlib.pyplot as plt
from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases, plot_period_transactions,plot_history_alive
bgf = BetaGeoFitter()
beta_a = 2
beta_b = 3
for customer in range(0, 10):
p_arr = []
beta = np.random.beta(a=beta_a, b=beta_b)
for transaction in range(1,10):
proba_inactive = beta*(1-beta)**(transaction-1)
p_arr.append(proba_inactive)
p_arr = np.array(p_arr)