USMortality · December 27, 2024 20:19
diff --git a/asmr_country_dataset.r b/asmr_country_dataset.r
 library(readr)
 library(dplyr)
 library(tsibble)
 library(stringr)
 library(fable)
 library(tidyr)
 library(lubridate)
 library(countrycode)

 options(vsc.dev.args = list(width = 600 * 2, height = 600 * 2, res = 72 * 2))

 # Get Excess Mortality from Mortality.Watch relative to a 3y prepandemic avg.
 mortality <- read_csv(
  "https://s3.mortality.watch/data/mortality/world_yearly.csv"
 ) |>
  filter(
    date >= 2017,
    !is.na(asmr_who),
    !str_starts(iso3c, "USA-"),
    !str_starts(iso3c, "DEU-"),
    iso3c != "LIE"
  ) |>
  as_tsibble(index = date, key = iso3c)

 # Simple 3y avg model.
 model <- mortality |>
  filter(date >= 2017 & date <= 2019) |>
  model(lm = TSLM(asmr_who))

 # Get the fitted values for the pre-2020 period using augment()
 fitted_values <- model |>
  augment() |>
  select(iso3c, date, .fitted)

 # Get the forecasted values with prediction intervals for the next 4 years
 forecasts <- model |>
  forecast(h = "4 years", level = 95) |>
  mutate(hl = hilo(asmr_who, level = 95)) |>
  unpack_hilo(cols = hl) |>
  as_tibble() |>
  select(iso3c, date, .mean, hl_lower, hl_upper)

 # Combine the fitted values with the original data for the pre-2020 period
 ts_augmented <- mortality |> left_join(fitted_values, by = c("iso3c", "date"))

 # Combine the forecast values with the dataset
 ts_with_forecast <- ts_augmented |>
  select(iso3c, date, asmr_who) |>
  left_join(forecasts, by = c("iso3c", "date"))

 df_mortality <- ts_with_forecast |>
  mutate(
    date = as.integer(date),
    asmr_who_ex_p = asmr_who / .mean - 1
  ) |>
  filter(date >= 2020) |>
  select(iso3c, date, asmr_who_ex_p) |>
  as_tibble()


 # Our World In Data
 owid <- read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")

 df_owid <- owid |>
  select(
    iso_code, date, stringency_index, population_density,
    median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty,
    cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers,
    hospital_beds_per_thousand, life_expectancy,
    human_development_index, people_vaccinated_per_hundred,
    total_vaccinations_per_hundred, people_vaccinated_per_hundred,
    people_fully_vaccinated_per_hundred, total_boosters_per_hundred
  ) |>
  arrange(iso_code, date) |>
  mutate(date = year(date)) |>
  group_by(iso_code) |>
  fill(total_vaccinations_per_hundred, people_vaccinated_per_hundred,
    people_fully_vaccinated_per_hundred, total_boosters_per_hundred,
    .direction = "down"
  ) |>
  replace_na(list(
    total_vaccinations_per_hundred = 0,
    people_vaccinated_per_hundred = 0,
    people_fully_vaccinated_per_hundred = 0,
    total_boosters_per_hundred = 0
  )) |>
  group_by(iso_code, date) |>
  summarize(
    stringency_index = sum(stringency_index, na.rm = TRUE),
    population_density = mean(population_density, na.rm = TRUE),
    median_age = mean(median_age, na.rm = TRUE),
    aged_65_older = mean(aged_65_older, na.rm = TRUE),
    aged_70_older = mean(aged_70_older, na.rm = TRUE),
    gdp_per_capita = mean(gdp_per_capita, na.rm = TRUE),
    extreme_poverty = mean(extreme_poverty, na.rm = TRUE),
    cardiovasc_death_rate = mean(cardiovasc_death_rate, na.rm = TRUE),
    diabetes_prevalence = mean(diabetes_prevalence, na.rm = TRUE),
    female_smokers = mean(female_smokers, na.rm = TRUE),
    male_smokers = mean(male_smokers, na.rm = TRUE),
    hospital_beds_per_thousand = mean(hospital_beds_per_thousand, na.rm = TRUE),
    life_expectancy = mean(life_expectancy, na.rm = TRUE),
    human_development_index = mean(human_development_index, na.rm = TRUE),
    total_vaccinations_per_hundred = max(
      total_vaccinations_per_hundred,
      na.rm = TRUE
    ),
    people_vaccinated_per_hundred = max(
      people_vaccinated_per_hundred,
      na.rm = TRUE
    ),
    people_fully_vaccinated_per_hundred = max(
      people_fully_vaccinated_per_hundred,
      na.rm = TRUE
    ),
    total_boosters_per_hundred = max(
      total_boosters_per_hundred,
      na.rm = TRUE
    ),
    .groups = "drop"
  )


 # CIA World Factbook
 apify_base <- "https://apify.mortality.watch/cia-world-factbook/"
 obesity <- read_csv(paste0(apify_base, "obesity-adult-prevalence-rate.csv"))
 alcohol <- read_csv(paste0(apify_base, "alcohol-consumption-per-capita.csv"))
 tobacco <- read_csv(paste0(apify_base, "tobacco-use.csv"))

 # Add ISO3C code column to each dataset
 obesity$iso3c <- countrycode(obesity$name, "country.name", "iso3c")
 alcohol$iso3c <- countrycode(alcohol$name, "country.name", "iso3c")
 tobacco$iso3c <- countrycode(tobacco$name, "country.name", "iso3c")

 # Standardize the `%` values and add type columns
 obesity_clean <- obesity |>
  mutate(obesity = obesity$`%` / 100) |>
  select(iso3c, obesity)

 alcohol_clean <- alcohol |>
  mutate(alcohol = alcohol$`liters of pure alcohol`) |>
  select(iso3c, alcohol)

 tobacco_clean <- tobacco |>
  mutate(tobacco = tobacco$`%` / 100) |>
  select(iso3c, tobacco)

 df_cia <- obesity_clean |>
  full_join(alcohol_clean, by = "iso3c") |>
  full_join(tobacco_clean, by = "iso3c")


 # Final dataset
 df <- df_mortality |>
  inner_join(df_owid, by = join_by(iso3c == iso_code, date)) |>
  inner_join(df_cia, by = join_by(iso3c))
	library(readr)
	library(dplyr)
	library(tsibble)
	library(stringr)
	library(fable)
	library(tidyr)
	library(lubridate)
	library(countrycode)

	options(vsc.dev.args = list(width = 600 * 2, height = 600 * 2, res = 72 * 2))

	# Get Excess Mortality from Mortality.Watch relative to a 3y prepandemic avg.
	mortality <- read_csv(
	"https://s3.mortality.watch/data/mortality/world_yearly.csv"
	) \|>
	filter(
	date >= 2017,
	!is.na(asmr_who),
	!str_starts(iso3c, "USA-"),
	!str_starts(iso3c, "DEU-"),
	iso3c != "LIE"
	) \|>
	as_tsibble(index = date, key = iso3c)

	# Simple 3y avg model.
	model <- mortality \|>
	filter(date >= 2017 & date <= 2019) \|>
	model(lm = TSLM(asmr_who))

	# Get the fitted values for the pre-2020 period using augment()
	fitted_values <- model \|>
	augment() \|>
	select(iso3c, date, .fitted)

	# Get the forecasted values with prediction intervals for the next 4 years
	forecasts <- model \|>
	forecast(h = "4 years", level = 95) \|>
	mutate(hl = hilo(asmr_who, level = 95)) \|>
	unpack_hilo(cols = hl) \|>
	as_tibble() \|>
	select(iso3c, date, .mean, hl_lower, hl_upper)

	# Combine the fitted values with the original data for the pre-2020 period
	ts_augmented <- mortality \|> left_join(fitted_values, by = c("iso3c", "date"))

	# Combine the forecast values with the dataset
	ts_with_forecast <- ts_augmented \|>
	select(iso3c, date, asmr_who) \|>
	left_join(forecasts, by = c("iso3c", "date"))

	df_mortality <- ts_with_forecast \|>
	mutate(
	date = as.integer(date),
	asmr_who_ex_p = asmr_who / .mean - 1
	) \|>
	filter(date >= 2020) \|>
	select(iso3c, date, asmr_who_ex_p) \|>
	as_tibble()


	# Our World In Data
	owid <- read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")

	df_owid <- owid \|>
	select(
	iso_code, date, stringency_index, population_density,
	median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty,
	cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers,
	hospital_beds_per_thousand, life_expectancy,
	human_development_index, people_vaccinated_per_hundred,
	total_vaccinations_per_hundred, people_vaccinated_per_hundred,
	people_fully_vaccinated_per_hundred, total_boosters_per_hundred
	) \|>
	arrange(iso_code, date) \|>
	mutate(date = year(date)) \|>
	group_by(iso_code) \|>
	fill(total_vaccinations_per_hundred, people_vaccinated_per_hundred,
	people_fully_vaccinated_per_hundred, total_boosters_per_hundred,
	.direction = "down"
	) \|>
	replace_na(list(
	total_vaccinations_per_hundred = 0,
	people_vaccinated_per_hundred = 0,
	people_fully_vaccinated_per_hundred = 0,
	total_boosters_per_hundred = 0
	)) \|>
	group_by(iso_code, date) \|>
	summarize(
	stringency_index = sum(stringency_index, na.rm = TRUE),
	population_density = mean(population_density, na.rm = TRUE),
	median_age = mean(median_age, na.rm = TRUE),
	aged_65_older = mean(aged_65_older, na.rm = TRUE),
	aged_70_older = mean(aged_70_older, na.rm = TRUE),
	gdp_per_capita = mean(gdp_per_capita, na.rm = TRUE),
	extreme_poverty = mean(extreme_poverty, na.rm = TRUE),
	cardiovasc_death_rate = mean(cardiovasc_death_rate, na.rm = TRUE),
	diabetes_prevalence = mean(diabetes_prevalence, na.rm = TRUE),
	female_smokers = mean(female_smokers, na.rm = TRUE),
	male_smokers = mean(male_smokers, na.rm = TRUE),
	hospital_beds_per_thousand = mean(hospital_beds_per_thousand, na.rm = TRUE),
	life_expectancy = mean(life_expectancy, na.rm = TRUE),
	human_development_index = mean(human_development_index, na.rm = TRUE),
	total_vaccinations_per_hundred = max(
	total_vaccinations_per_hundred,
	na.rm = TRUE
	),
	people_vaccinated_per_hundred = max(
	people_vaccinated_per_hundred,
	na.rm = TRUE
	),
	people_fully_vaccinated_per_hundred = max(
	people_fully_vaccinated_per_hundred,
	na.rm = TRUE
	),
	total_boosters_per_hundred = max(
	total_boosters_per_hundred,
	na.rm = TRUE
	),
	.groups = "drop"
	)


	# CIA World Factbook
	apify_base <- "https://apify.mortality.watch/cia-world-factbook/"
	obesity <- read_csv(paste0(apify_base, "obesity-adult-prevalence-rate.csv"))
	alcohol <- read_csv(paste0(apify_base, "alcohol-consumption-per-capita.csv"))
	tobacco <- read_csv(paste0(apify_base, "tobacco-use.csv"))

	# Add ISO3C code column to each dataset
	obesity$iso3c <- countrycode(obesity$name, "country.name", "iso3c")
	alcohol$iso3c <- countrycode(alcohol$name, "country.name", "iso3c")
	tobacco$iso3c <- countrycode(tobacco$name, "country.name", "iso3c")

	# Standardize the `%` values and add type columns
	obesity_clean <- obesity \|>
	mutate(obesity = obesity$`%` / 100) \|>
	select(iso3c, obesity)

	alcohol_clean <- alcohol \|>
	mutate(alcohol = alcohol$`liters of pure alcohol`) \|>
	select(iso3c, alcohol)

	tobacco_clean <- tobacco \|>
	mutate(tobacco = tobacco$`%` / 100) \|>
	select(iso3c, tobacco)

	df_cia <- obesity_clean \|>
	full_join(alcohol_clean, by = "iso3c") \|>
	full_join(tobacco_clean, by = "iso3c")


	# Final dataset
	df <- df_mortality \|>
	inner_join(df_owid, by = join_by(iso3c == iso_code, date)) \|>
	inner_join(df_cia, by = join_by(iso3c))