Skip to content

Instantly share code, notes, and snippets.

@jrosell
Last active October 23, 2024 10:35
Show Gist options
  • Save jrosell/226b1f15ceaea5b6d077d9238022e5ae to your computer and use it in GitHub Desktop.
Save jrosell/226b1f15ceaea5b6d077d9238022e5ae to your computer and use it in GitHub Desktop.
# Original code: https://tim-tiefenbach.de/post/2023-dplyr-many-models/
# Loading the packages and getting the data ------------------------------------
if (!rlang::is_installed("pak")) install.packages("pak")
rlang::check_installed(c(
"pak",
"dplyr",
"tidyr",
"broom",
"modelsummary",
"purrr",
"dplyover",
"rlang"
))
library(dplyr)
library(tidyr)
library(broom)
library(rlang)
library(modelsummary)
library(purrr)
library(dplyover)
csat_named <- csatraw |>
rename(any_of(set_names(names(csatraw), names(csat)))) |>
select(
cust_id, type, product, csat,
ends_with("rating")
)
# Analyzing the subgrups -------------------------------------------------------
csat_prod_nested <-
csat_named |>
nest_by(product) |>
print()
base_formula <- csat ~ postal_rating + phone_rating + email_rating +
website_rating + shop_rating
csat_prod_nested_res <- csat_prod_nested |>
mutate(
mod = list(lm(base_formula, data = data)),
modstat = list(broom::glance(mod)),
res = list(broom::tidy(mod))
) |>
print()
csat_prod_nested_res |>
select(product, modstat) |>
unnest(modstat) |>
select(r.squared, p.value, nobs) |>
print()
csat_prod_nested_res |>
select(product, res) |>
unnest(res) |>
filter(term != "(Intercept)") |>
print()
# Analyzing the subgrups and all the data --------------------------------------
csat_all <- csat_named |>
mutate(product = "All") |>
bind_rows(csat_named)
csat_all |> count(product)
csat_all_res <- csat_all |>
nest_by(product) |>
mutate(
mod = list(lm(base_formula, data = data)),
res = list(broom::tidy(mod)),
modstat = list(broom::glance(mod))
) |>
print()
csat_all_res |>
select(product, modstat) |>
unnest(modstat) |>
select(r.squared, p.value, nobs) |>
print()
csat_all_res |>
select(product, res) |>
unnest(res) |>
filter(term != "(Intercept)") |>
print()
# Create additional subgroups that meet specific filter criteria ---------------
filter_ls <- list(
All = TRUE,
no_reactivate = expr(type != "reactivate")
)
csat_all_grps <- csat_all |>
nest_by(product) |>
expand_grid(filter_ls) |>
mutate(
type = names(filter_ls),
.after = product
) |>
rowwise() |>
mutate(data = list(
filter(data, eval(filter_ls))
),
.keep = "unused"
) |>
print()
csat_all_grps_grid <- csat_all_grps |>
mutate(
mod = list(lm(base_formula, data = data)),
res = list(broom::tidy(mod)),
modstat = list(broom::glance(mod))
) |>
print()
csat_all_grps_grid |>
select(product, type, modstat) |>
unnest(modstat) |>
select(-c(sigma, statistic, df:df.residual)) |>
print()
csat_all_grps_grid |>
select(product, type, res) |>
unnest(res) |>
filter(term == "website_rating")
csat_all_grps_grid$modstat[4]
# Dynamically name list elements -----------------------------------------------
csat_all_grps_grid <- csat_all_grps |>
rowwise() |>
mutate(
mod = list2("{product}_{type}" := lm(base_formula, data = data)),
res = list2("{product}_{type}" := broom::tidy(mod)),
modstat = list2("{product}_{type}" := broom::glance(mod))
) |>
print()
csat_all_grps_grid$modstat
# Data-less grids: Multiple filters --------------------------------------------
filter_ls <- list(
All = TRUE,
no_reactivate = expr(type != "reactivate")
)
product <- c(
"All", unique(csat_named$product)
)
all_grps_grid <- expand_grid(product, filter_ls) |>
mutate(type = names(filter_ls),
.after = product)
all_grps_grid
all_grps_grid_mod <- all_grps_grid |>
rowwise() |>
mutate(mod = list(
lm(
base_formula,
data = filter(
csat_named,
.env$product == "All" | .env$product == product,
eval(filter_ls)
)
)
)
) |>
select(!filter_ls) |>
print()
all_grps_grid_res <- all_grps_grid_mod |>
mutate(
res = list(broom::tidy(mod)),
modstat = list(broom::glance(mod))
) |>
print()
all_grps_grid_res |>
select(product, type, modstat) |>
unnest(modstat) |>
select(-c(sigma, statistic, df:df.residual))|>
print()
# Create a separate model for each independent variable ------------------------
indep_vars <- c("postal_rating",
"phone_rating",
"email_rating",
"website_rating",
"shop_rating")
all_grps_grid_vars <- all_grps_grid |>
expand_grid(indep_vars) |>
print()
all_grps_grid_vars_mod <- all_grps_grid_vars |>
rowwise() |>
mutate(mod = list(
lm(reformulate(indep_vars, "csat"), # <- this part is new
data = filter(csat_named,
.env$product == "All" | .env$product == product,
eval(filter_ls)
)
)
)
) |>
select(!filter_ls) |>
print()
# Update a baseline model with additional variables ----------------------------
min_formula <- csat ~ postal_rating + phone_rating + shop_rating
update_vars <- list(
base = NULL,
email = "email_rating",
website = "website_rating",
both = c("email_rating", "website_rating"),
both_plus_interaction = "email_rating*website_rating"
)
all_grid_upd_vars <- all_grps_grid |>
expand_grid(update_vars) |>
mutate(
model_spec = names(update_vars),
.after = type
) |>
print()
all_grid_upd_vars_form <- all_grid_upd_vars |>
rowwise() |>
mutate(
form = list(
update(
min_formula, # old formula
reformulate(c(".", update_vars)) # changes to formula
)
),
mod = list2( "{product}_{type}_{model_spec}" :=
lm(
form,
data = filter(
csat_named,
.env$product == "All" | .env$product == product,
eval(filter_ls)
)
)
)
) |>
print()
head(all_grid_upd_vars_form$mod, 2)
# Save the results -------------------------------------------------------------
out <- modelsummary(
models = all_grid_upd_vars_form$mod,
output = "model_results.xlsx",
gof_omit = "AIC|BIC|Log.Lik|RMSE",
coef_omit = "(Intercept)",
stars = TRUE,
statistic = NULL
)
out <- modelsummary(models = all_grid_upd_vars_form$mod,
output = "data.frame",
gof_omit = "AIC|BIC|Log.Lik|RMSE",
coef_omit = "(Intercept)",
statistic = "stars") |>
mutate(statistic = ifelse(statistic == "", "estimate", statistic)) |>
select(-part) |>
pivot_wider(
names_from = statistic,
values_from = -c(term, statistic),
values_fn = as.numeric
)
openxlsx::write.xlsx(out, "model_results2.xlsx")
# Compare two different versions of our dependent variable ---------------------
csat_named_top <- csat_named |>
mutate(csat_top = ifelse(csat == 5, 1, 0))
dep_vars <- c("csat", "csat_top")
all_grps_grid_final <- all_grid_upd_vars |>
expand_grid(dep_vars) |>
print()
all_grps_grid_final_res <- all_grps_grid_final |>
rowwise() |>
mutate(
form = list2(
"{product}_{type}_{model_spec}_{dep_vars}" :=
update(min_formula, reformulate(c(".", update_vars), dep_vars))
),
mod = list(
lm(
form,
data = filter(
csat_named_top,
.env$product == "All" | .env$product == product,
eval(filter_ls)
)
)
),
res = list(broom::tidy(mod)),
modstat = list(broom::glance(mod))
) |>
select(product:model_spec, dep_vars, mod:modstat) |>
print()
# Significant at the 10% level by descending adjusted r-squared ----------------
all_grps_grid_final_res |>
unnest(modstat) |>
select(-c(sigma, statistic, df:df.residual)) |>
filter(p.value < 0.1) |>
arrange(desc(adj.r.squared))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment