Last active
October 23, 2024 10:35
-
-
Save jrosell/226b1f15ceaea5b6d077d9238022e5ae to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Original code: https://tim-tiefenbach.de/post/2023-dplyr-many-models/ | |
# Loading the packages and getting the data ------------------------------------ | |
if (!rlang::is_installed("pak")) install.packages("pak") | |
rlang::check_installed(c( | |
"pak", | |
"dplyr", | |
"tidyr", | |
"broom", | |
"modelsummary", | |
"purrr", | |
"dplyover", | |
"rlang" | |
)) | |
library(dplyr) | |
library(tidyr) | |
library(broom) | |
library(rlang) | |
library(modelsummary) | |
library(purrr) | |
library(dplyover) | |
csat_named <- csatraw |> | |
rename(any_of(set_names(names(csatraw), names(csat)))) |> | |
select( | |
cust_id, type, product, csat, | |
ends_with("rating") | |
) | |
# Analyzing the subgrups ------------------------------------------------------- | |
csat_prod_nested <- | |
csat_named |> | |
nest_by(product) |> | |
print() | |
base_formula <- csat ~ postal_rating + phone_rating + email_rating + | |
website_rating + shop_rating | |
csat_prod_nested_res <- csat_prod_nested |> | |
mutate( | |
mod = list(lm(base_formula, data = data)), | |
modstat = list(broom::glance(mod)), | |
res = list(broom::tidy(mod)) | |
) |> | |
print() | |
csat_prod_nested_res |> | |
select(product, modstat) |> | |
unnest(modstat) |> | |
select(r.squared, p.value, nobs) |> | |
print() | |
csat_prod_nested_res |> | |
select(product, res) |> | |
unnest(res) |> | |
filter(term != "(Intercept)") |> | |
print() | |
# Analyzing the subgrups and all the data -------------------------------------- | |
csat_all <- csat_named |> | |
mutate(product = "All") |> | |
bind_rows(csat_named) | |
csat_all |> count(product) | |
csat_all_res <- csat_all |> | |
nest_by(product) |> | |
mutate( | |
mod = list(lm(base_formula, data = data)), | |
res = list(broom::tidy(mod)), | |
modstat = list(broom::glance(mod)) | |
) |> | |
print() | |
csat_all_res |> | |
select(product, modstat) |> | |
unnest(modstat) |> | |
select(r.squared, p.value, nobs) |> | |
print() | |
csat_all_res |> | |
select(product, res) |> | |
unnest(res) |> | |
filter(term != "(Intercept)") |> | |
print() | |
# Create additional subgroups that meet specific filter criteria --------------- | |
filter_ls <- list( | |
All = TRUE, | |
no_reactivate = expr(type != "reactivate") | |
) | |
csat_all_grps <- csat_all |> | |
nest_by(product) |> | |
expand_grid(filter_ls) |> | |
mutate( | |
type = names(filter_ls), | |
.after = product | |
) |> | |
rowwise() |> | |
mutate(data = list( | |
filter(data, eval(filter_ls)) | |
), | |
.keep = "unused" | |
) |> | |
print() | |
csat_all_grps_grid <- csat_all_grps |> | |
mutate( | |
mod = list(lm(base_formula, data = data)), | |
res = list(broom::tidy(mod)), | |
modstat = list(broom::glance(mod)) | |
) |> | |
print() | |
csat_all_grps_grid |> | |
select(product, type, modstat) |> | |
unnest(modstat) |> | |
select(-c(sigma, statistic, df:df.residual)) |> | |
print() | |
csat_all_grps_grid |> | |
select(product, type, res) |> | |
unnest(res) |> | |
filter(term == "website_rating") | |
csat_all_grps_grid$modstat[4] | |
# Dynamically name list elements ----------------------------------------------- | |
csat_all_grps_grid <- csat_all_grps |> | |
rowwise() |> | |
mutate( | |
mod = list2("{product}_{type}" := lm(base_formula, data = data)), | |
res = list2("{product}_{type}" := broom::tidy(mod)), | |
modstat = list2("{product}_{type}" := broom::glance(mod)) | |
) |> | |
print() | |
csat_all_grps_grid$modstat | |
# Data-less grids: Multiple filters -------------------------------------------- | |
filter_ls <- list( | |
All = TRUE, | |
no_reactivate = expr(type != "reactivate") | |
) | |
product <- c( | |
"All", unique(csat_named$product) | |
) | |
all_grps_grid <- expand_grid(product, filter_ls) |> | |
mutate(type = names(filter_ls), | |
.after = product) | |
all_grps_grid | |
all_grps_grid_mod <- all_grps_grid |> | |
rowwise() |> | |
mutate(mod = list( | |
lm( | |
base_formula, | |
data = filter( | |
csat_named, | |
.env$product == "All" | .env$product == product, | |
eval(filter_ls) | |
) | |
) | |
) | |
) |> | |
select(!filter_ls) |> | |
print() | |
all_grps_grid_res <- all_grps_grid_mod |> | |
mutate( | |
res = list(broom::tidy(mod)), | |
modstat = list(broom::glance(mod)) | |
) |> | |
print() | |
all_grps_grid_res |> | |
select(product, type, modstat) |> | |
unnest(modstat) |> | |
select(-c(sigma, statistic, df:df.residual))|> | |
print() | |
# Create a separate model for each independent variable ------------------------ | |
indep_vars <- c("postal_rating", | |
"phone_rating", | |
"email_rating", | |
"website_rating", | |
"shop_rating") | |
all_grps_grid_vars <- all_grps_grid |> | |
expand_grid(indep_vars) |> | |
print() | |
all_grps_grid_vars_mod <- all_grps_grid_vars |> | |
rowwise() |> | |
mutate(mod = list( | |
lm(reformulate(indep_vars, "csat"), # <- this part is new | |
data = filter(csat_named, | |
.env$product == "All" | .env$product == product, | |
eval(filter_ls) | |
) | |
) | |
) | |
) |> | |
select(!filter_ls) |> | |
print() | |
# Update a baseline model with additional variables ---------------------------- | |
min_formula <- csat ~ postal_rating + phone_rating + shop_rating | |
update_vars <- list( | |
base = NULL, | |
email = "email_rating", | |
website = "website_rating", | |
both = c("email_rating", "website_rating"), | |
both_plus_interaction = "email_rating*website_rating" | |
) | |
all_grid_upd_vars <- all_grps_grid |> | |
expand_grid(update_vars) |> | |
mutate( | |
model_spec = names(update_vars), | |
.after = type | |
) |> | |
print() | |
all_grid_upd_vars_form <- all_grid_upd_vars |> | |
rowwise() |> | |
mutate( | |
form = list( | |
update( | |
min_formula, # old formula | |
reformulate(c(".", update_vars)) # changes to formula | |
) | |
), | |
mod = list2( "{product}_{type}_{model_spec}" := | |
lm( | |
form, | |
data = filter( | |
csat_named, | |
.env$product == "All" | .env$product == product, | |
eval(filter_ls) | |
) | |
) | |
) | |
) |> | |
print() | |
head(all_grid_upd_vars_form$mod, 2) | |
# Save the results ------------------------------------------------------------- | |
out <- modelsummary( | |
models = all_grid_upd_vars_form$mod, | |
output = "model_results.xlsx", | |
gof_omit = "AIC|BIC|Log.Lik|RMSE", | |
coef_omit = "(Intercept)", | |
stars = TRUE, | |
statistic = NULL | |
) | |
out <- modelsummary(models = all_grid_upd_vars_form$mod, | |
output = "data.frame", | |
gof_omit = "AIC|BIC|Log.Lik|RMSE", | |
coef_omit = "(Intercept)", | |
statistic = "stars") |> | |
mutate(statistic = ifelse(statistic == "", "estimate", statistic)) |> | |
select(-part) |> | |
pivot_wider( | |
names_from = statistic, | |
values_from = -c(term, statistic), | |
values_fn = as.numeric | |
) | |
openxlsx::write.xlsx(out, "model_results2.xlsx") | |
# Compare two different versions of our dependent variable --------------------- | |
csat_named_top <- csat_named |> | |
mutate(csat_top = ifelse(csat == 5, 1, 0)) | |
dep_vars <- c("csat", "csat_top") | |
all_grps_grid_final <- all_grid_upd_vars |> | |
expand_grid(dep_vars) |> | |
print() | |
all_grps_grid_final_res <- all_grps_grid_final |> | |
rowwise() |> | |
mutate( | |
form = list2( | |
"{product}_{type}_{model_spec}_{dep_vars}" := | |
update(min_formula, reformulate(c(".", update_vars), dep_vars)) | |
), | |
mod = list( | |
lm( | |
form, | |
data = filter( | |
csat_named_top, | |
.env$product == "All" | .env$product == product, | |
eval(filter_ls) | |
) | |
) | |
), | |
res = list(broom::tidy(mod)), | |
modstat = list(broom::glance(mod)) | |
) |> | |
select(product:model_spec, dep_vars, mod:modstat) |> | |
print() | |
# Significant at the 10% level by descending adjusted r-squared ---------------- | |
all_grps_grid_final_res |> | |
unnest(modstat) |> | |
select(-c(sigma, statistic, df:df.residual)) |> | |
filter(p.value < 0.1) |> | |
arrange(desc(adj.r.squared)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment