TimTeaFan · October 1, 2022 19:43
diff --git a/many_models_advanced.R b/many_models_advanced.R
 library(tidyverse)
 library(gapminder)

 # lists of independent variables
 indep_var_country <- list(
  "gdp_only" = "gdpPercap",
  "gdp_and_pop"  = c("pop", "gdpPercap")
  )

 indep_var_continent <- append(
  indep_var_country,
  list("gpd_pop_country" = c("pop", "gdpPercap", "country"))
 )

 indep_var_world <- append(
  indep_var_continent,
  list("gpd_pop_cont" = c("pop", "gdpPercap", "continent"))
 )

 # create base dataset with two extra grouping columns starting with `grp_`
 gap_base <- gapminder %>% 
  mutate(grp_continent = continent, 
         grp_country = country,
         .before = 1L)

 # dataset for country levels expanded with indep vars on country level
 gap_country <- gap_base %>% 
  expand_grid(indep_vars = indep_var_country)

 # dataset for continent levels expanded with indep vars on continent level
 gap_continent <- gap_base %>% 
  mutate(grp_country = grp_continent) %>% 
  expand_grid(indep_vars = indep_var_continent)

 # dataset for world expanded with indep vars on world level
 gap_world <- gap_base %>% 
  mutate(grp_country = "World",
         grp_continent  = "World") %>% 
  expand_grid(indep_vars = indep_var_world)

 # lets bind the datasets together and nest and arrange them
 gap_nested <- bind_rows(
  "country" = gap_country,
  "continent" = gap_continent,
  "world" = gap_world,
  .id = "level"
 ) %>% 
  # lets add the names of the model groups as column
  mutate(mod_group = names(indep_vars)) %>% 
  # lets nest_by and proceed rowwise
  nest_by(level, mod_group, grp_continent, grp_country, indep_vars) %>% 
  # show "world" and "continent" level models first
  # and show model groups in order from gdp_only to gpd_pop_country_cont
  arrange(level != "world",
          level == "country",
          nchar(mod_group)) 

 gap_res <- gap_nested %>% 
  mutate(
    # create models using the list of characters in indep_vars
    mod = list(lm(reformulate(indep_vars, "lifeExp"),
                       data = data)),
    # get model estimates
    res = list(broom::tidy(mod)),
    # get model statistics
    stats = list(broom::glance(mod))
  ) %>%
  # ungroup and only select relevant columns
  ungroup() %>% 
  select(level, mod_group, grp_continent, grp_country, res, stats)

 # Create a tibble with model statistics
 gap_stats <- gap_res %>% 
  unnest(stats) %>% 
  select(-res)

 # Create a tibble with model estimates and p-values
 gap_estimates <- gap_res %>% 
  unnest(res) %>% 
  filter(term != "(Intercept)") %>% 
  select(-stats)

 # from here on we can filter and extract the model data and choose what we want to plot  

 # Example: which independent variables work best on which level on average:
 gap_stats %>% 
  group_by(level, mod_group) %>% 
  summarise(adjr2 = mean(adj.r.squared)) %>% 
  arrange(desc(adjr2))
  
 # A tibble: 9 × 3
 # Groups:   level [3]
 # level     mod_group       adjr2
 # <chr>     <chr>           <dbl>
 # 1 country   gdp_and_pop     0.859
 # 2 world     gpd_pop_country 0.770
 # 3 continent gpd_pop_country 0.691
 # 4 country   gdp_only        0.598
 # 5 world     gpd_pop_cont    0.581
 # 6 continent gdp_and_pop     0.441
 # 7 continent gdp_only        0.431
 # 8 world     gdp_and_pop     0.346
 # 9 world     gdp_only        0.340

 # ...
 # to be continued
	library(tidyverse)
	library(gapminder)

	# lists of independent variables
	indep_var_country <- list(
	"gdp_only" = "gdpPercap",
	"gdp_and_pop" = c("pop", "gdpPercap")
	)

	indep_var_continent <- append(
	indep_var_country,
	list("gpd_pop_country" = c("pop", "gdpPercap", "country"))
	)

	indep_var_world <- append(
	indep_var_continent,
	list("gpd_pop_cont" = c("pop", "gdpPercap", "continent"))
	)

	# create base dataset with two extra grouping columns starting with `grp_`
	gap_base <- gapminder %>%
	mutate(grp_continent = continent,
	grp_country = country,
	.before = 1L)

	# dataset for country levels expanded with indep vars on country level
	gap_country <- gap_base %>%
	expand_grid(indep_vars = indep_var_country)

	# dataset for continent levels expanded with indep vars on continent level
	gap_continent <- gap_base %>%
	mutate(grp_country = grp_continent) %>%
	expand_grid(indep_vars = indep_var_continent)

	# dataset for world expanded with indep vars on world level
	gap_world <- gap_base %>%
	mutate(grp_country = "World",
	grp_continent = "World") %>%
	expand_grid(indep_vars = indep_var_world)

	# lets bind the datasets together and nest and arrange them
	gap_nested <- bind_rows(
	"country" = gap_country,
	"continent" = gap_continent,
	"world" = gap_world,
	.id = "level"
	) %>%
	# lets add the names of the model groups as column
	mutate(mod_group = names(indep_vars)) %>%
	# lets nest_by and proceed rowwise
	nest_by(level, mod_group, grp_continent, grp_country, indep_vars) %>%
	# show "world" and "continent" level models first
	# and show model groups in order from gdp_only to gpd_pop_country_cont
	arrange(level != "world",
	level == "country",
	nchar(mod_group))

	gap_res <- gap_nested %>%
	mutate(
	# create models using the list of characters in indep_vars
	mod = list(lm(reformulate(indep_vars, "lifeExp"),
	data = data)),
	# get model estimates
	res = list(broom::tidy(mod)),
	# get model statistics
	stats = list(broom::glance(mod))
	) %>%
	# ungroup and only select relevant columns
	ungroup() %>%
	select(level, mod_group, grp_continent, grp_country, res, stats)

	# Create a tibble with model statistics
	gap_stats <- gap_res %>%
	unnest(stats) %>%
	select(-res)

	# Create a tibble with model estimates and p-values
	gap_estimates <- gap_res %>%
	unnest(res) %>%
	filter(term != "(Intercept)") %>%
	select(-stats)

	# from here on we can filter and extract the model data and choose what we want to plot

	# Example: which independent variables work best on which level on average:
	gap_stats %>%
	group_by(level, mod_group) %>%
	summarise(adjr2 = mean(adj.r.squared)) %>%
	arrange(desc(adjr2))

	# A tibble: 9 × 3
	# Groups: level [3]
	# level mod_group adjr2
	# <chr> <chr> <dbl>
	# 1 country gdp_and_pop 0.859
	# 2 world gpd_pop_country 0.770
	# 3 continent gpd_pop_country 0.691
	# 4 country gdp_only 0.598
	# 5 world gpd_pop_cont 0.581
	# 6 continent gdp_and_pop 0.441
	# 7 continent gdp_only 0.431
	# 8 world gdp_and_pop 0.346
	# 9 world gdp_only 0.340

	# ...
	# to be continued