This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyr) | |
library(reshape2) | |
library(ggplot2) | |
library(corrplot) | |
#split numeric and categorical variables | |
is.fact <- sapply(df1, is.factor) | |
df1_cat <- df1[,is.fact] %>% | |
mutate(count = 1) %>% | |
gather |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df <- subset(df, select = -c( EmployeeCount, EmployeeID, Over18, StandardHours)) | |
df1 <- df[complete.cases(df),] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
employee_survey_data <- read.csv('Downloads/HR Analytics/employee_survey_data.csv') | |
general_data <- read.csv('Downloads/HR Analytics/general_data.csv') | |
manager_survey_data <- read.csv('Downloads/HR Analytics/manager_survey_data.csv') | |
df <- general_data %>% | |
left_join(employee_survey_data, by = "EmployeeID") %>% | |
left_join(manager_survey_data, by = "EmployeeID") | |
str(df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Plot historical probability of being alive | |
customer_id = .. | |
days_since_birth = .... | |
sp_trans = transaction_data.loc[transaction_data['customer_id'] == customer_id] | |
plot_history_alive(bgf, days_since_birth, sp_trans, 'date') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Calibration and Holdouts periods for data split | |
date_start = ['2013-04-01','2014-04-01','2015-04-01','2016-04-01'] | |
calibration_period_end = ['2015-03-31','2016-03-31','2017-03-31','2018-03-31'] | |
date_end = ['2016-03-31','2017-03-31','2018-03-31','2019-03-31'] | |
#Arrays where to store the results of cross validation | |
accuracies_1y = [] | |
holdouts_1y = [] | |
predictions_1y = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot_frequency_recency_matrix(bgf) | |
plot_probability_alive_matrix(bgf) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cal_hold = calibration_and_holdout_data(trans_dataset, | |
'customer_id', | |
'date', | |
calibration_period_end='2018-04-30', #2 years calibration | |
observation_period_end='2019-04-30', #1 year holdout | |
freq = 'M') | |
cal_hold.head() | |
bgf = BetaGeoFitter() | |
bgf.fit(cal_hold['frequency'], cal_hold['recency'], cal_hold['T']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot_period_transactions(bgf) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from lifetimes.utils import * | |
from lifetimes import BetaGeoFitter | |
from lifetimes.plotting import plot_probability_alive_matrix, plot_frequency_recency_matrix | |
from lifetimes.generate_data import beta_geometric_nbd_model | |
import matplotlib.pyplot as plt | |
from lifetimes.plotting import plot_calibration_purchases_vs_holdout_purchases, plot_period_transactions,plot_history_alive | |
bgf = BetaGeoFitter() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
beta_a = 2 | |
beta_b = 3 | |
for customer in range(0, 10): | |
p_arr = [] | |
beta = np.random.beta(a=beta_a, b=beta_b) | |
for transaction in range(1,10): | |
proba_inactive = beta*(1-beta)**(transaction-1) | |
p_arr.append(proba_inactive) | |
p_arr = np.array(p_arr) |
NewerOlder