Last active
October 1, 2022 19:43
-
-
Save TimTeaFan/c9f74f22ea1559ab9ad85edc26a5c809 to your computer and use it in GitHub Desktop.
many model approach with multi-level data and models
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(gapminder) | |
# lists of independent variables | |
indep_var_country <- list( | |
"gdp_only" = "gdpPercap", | |
"gdp_and_pop" = c("pop", "gdpPercap") | |
) | |
indep_var_continent <- append( | |
indep_var_country, | |
list("gpd_pop_country" = c("pop", "gdpPercap", "country")) | |
) | |
indep_var_world <- append( | |
indep_var_continent, | |
list("gpd_pop_cont" = c("pop", "gdpPercap", "continent")) | |
) | |
# create base dataset with two extra grouping columns starting with `grp_` | |
gap_base <- gapminder %>% | |
mutate(grp_continent = continent, | |
grp_country = country, | |
.before = 1L) | |
# dataset for country levels expanded with indep vars on country level | |
gap_country <- gap_base %>% | |
expand_grid(indep_vars = indep_var_country) | |
# dataset for continent levels expanded with indep vars on continent level | |
gap_continent <- gap_base %>% | |
mutate(grp_country = grp_continent) %>% | |
expand_grid(indep_vars = indep_var_continent) | |
# dataset for world expanded with indep vars on world level | |
gap_world <- gap_base %>% | |
mutate(grp_country = "World", | |
grp_continent = "World") %>% | |
expand_grid(indep_vars = indep_var_world) | |
# lets bind the datasets together and nest and arrange them | |
gap_nested <- bind_rows( | |
"country" = gap_country, | |
"continent" = gap_continent, | |
"world" = gap_world, | |
.id = "level" | |
) %>% | |
# lets add the names of the model groups as column | |
mutate(mod_group = names(indep_vars)) %>% | |
# lets nest_by and proceed rowwise | |
nest_by(level, mod_group, grp_continent, grp_country, indep_vars) %>% | |
# show "world" and "continent" level models first | |
# and show model groups in order from gdp_only to gpd_pop_country_cont | |
arrange(level != "world", | |
level == "country", | |
nchar(mod_group)) | |
gap_res <- gap_nested %>% | |
mutate( | |
# create models using the list of characters in indep_vars | |
mod = list(lm(reformulate(indep_vars, "lifeExp"), | |
data = data)), | |
# get model estimates | |
res = list(broom::tidy(mod)), | |
# get model statistics | |
stats = list(broom::glance(mod)) | |
) %>% | |
# ungroup and only select relevant columns | |
ungroup() %>% | |
select(level, mod_group, grp_continent, grp_country, res, stats) | |
# Create a tibble with model statistics | |
gap_stats <- gap_res %>% | |
unnest(stats) %>% | |
select(-res) | |
# Create a tibble with model estimates and p-values | |
gap_estimates <- gap_res %>% | |
unnest(res) %>% | |
filter(term != "(Intercept)") %>% | |
select(-stats) | |
# from here on we can filter and extract the model data and choose what we want to plot | |
# Example: which independent variables work best on which level on average: | |
gap_stats %>% | |
group_by(level, mod_group) %>% | |
summarise(adjr2 = mean(adj.r.squared)) %>% | |
arrange(desc(adjr2)) | |
# A tibble: 9 × 3 | |
# Groups: level [3] | |
# level mod_group adjr2 | |
# <chr> <chr> <dbl> | |
# 1 country gdp_and_pop 0.859 | |
# 2 world gpd_pop_country 0.770 | |
# 3 continent gpd_pop_country 0.691 | |
# 4 country gdp_only 0.598 | |
# 5 world gpd_pop_cont 0.581 | |
# 6 continent gdp_and_pop 0.441 | |
# 7 continent gdp_only 0.431 | |
# 8 world gdp_and_pop 0.346 | |
# 9 world gdp_only 0.340 | |
# ... | |
# to be continued |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment