Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active December 27, 2024 20:19
Show Gist options
  • Save USMortality/cf0dc23d0d5ca609539fe11c9c089fcc to your computer and use it in GitHub Desktop.
Save USMortality/cf0dc23d0d5ca609539fe11c9c089fcc to your computer and use it in GitHub Desktop.
eASMR (3y) Country Dataset including other variables
library(readr)
library(dplyr)
library(tsibble)
library(stringr)
library(fable)
library(tidyr)
library(lubridate)
library(countrycode)
options(vsc.dev.args = list(width = 600 * 2, height = 600 * 2, res = 72 * 2))
# Get Excess Mortality from Mortality.Watch relative to a 3y prepandemic avg.
mortality <- read_csv(
"https://s3.mortality.watch/data/mortality/world_yearly.csv"
) |>
filter(
date >= 2017,
!is.na(asmr_who),
!str_starts(iso3c, "USA-"),
!str_starts(iso3c, "DEU-"),
iso3c != "LIE"
) |>
as_tsibble(index = date, key = iso3c)
# Simple 3y avg model.
model <- mortality |>
filter(date >= 2017 & date <= 2019) |>
model(lm = TSLM(asmr_who))
# Get the fitted values for the pre-2020 period using augment()
fitted_values <- model |>
augment() |>
select(iso3c, date, .fitted)
# Get the forecasted values with prediction intervals for the next 4 years
forecasts <- model |>
forecast(h = "4 years", level = 95) |>
mutate(hl = hilo(asmr_who, level = 95)) |>
unpack_hilo(cols = hl) |>
as_tibble() |>
select(iso3c, date, .mean, hl_lower, hl_upper)
# Combine the fitted values with the original data for the pre-2020 period
ts_augmented <- mortality |> left_join(fitted_values, by = c("iso3c", "date"))
# Combine the forecast values with the dataset
ts_with_forecast <- ts_augmented |>
select(iso3c, date, asmr_who) |>
left_join(forecasts, by = c("iso3c", "date"))
df_mortality <- ts_with_forecast |>
mutate(
date = as.integer(date),
asmr_who_ex_p = asmr_who / .mean - 1
) |>
filter(date >= 2020) |>
select(iso3c, date, asmr_who_ex_p) |>
as_tibble()
# Our World In Data
owid <- read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")
df_owid <- owid |>
select(
iso_code, date, stringency_index, population_density,
median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty,
cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers,
hospital_beds_per_thousand, life_expectancy,
human_development_index, people_vaccinated_per_hundred,
total_vaccinations_per_hundred, people_vaccinated_per_hundred,
people_fully_vaccinated_per_hundred, total_boosters_per_hundred
) |>
arrange(iso_code, date) |>
mutate(date = year(date)) |>
group_by(iso_code) |>
fill(total_vaccinations_per_hundred, people_vaccinated_per_hundred,
people_fully_vaccinated_per_hundred, total_boosters_per_hundred,
.direction = "down"
) |>
replace_na(list(
total_vaccinations_per_hundred = 0,
people_vaccinated_per_hundred = 0,
people_fully_vaccinated_per_hundred = 0,
total_boosters_per_hundred = 0
)) |>
group_by(iso_code, date) |>
summarize(
stringency_index = sum(stringency_index, na.rm = TRUE),
population_density = mean(population_density, na.rm = TRUE),
median_age = mean(median_age, na.rm = TRUE),
aged_65_older = mean(aged_65_older, na.rm = TRUE),
aged_70_older = mean(aged_70_older, na.rm = TRUE),
gdp_per_capita = mean(gdp_per_capita, na.rm = TRUE),
extreme_poverty = mean(extreme_poverty, na.rm = TRUE),
cardiovasc_death_rate = mean(cardiovasc_death_rate, na.rm = TRUE),
diabetes_prevalence = mean(diabetes_prevalence, na.rm = TRUE),
female_smokers = mean(female_smokers, na.rm = TRUE),
male_smokers = mean(male_smokers, na.rm = TRUE),
hospital_beds_per_thousand = mean(hospital_beds_per_thousand, na.rm = TRUE),
life_expectancy = mean(life_expectancy, na.rm = TRUE),
human_development_index = mean(human_development_index, na.rm = TRUE),
total_vaccinations_per_hundred = max(
total_vaccinations_per_hundred,
na.rm = TRUE
),
people_vaccinated_per_hundred = max(
people_vaccinated_per_hundred,
na.rm = TRUE
),
people_fully_vaccinated_per_hundred = max(
people_fully_vaccinated_per_hundred,
na.rm = TRUE
),
total_boosters_per_hundred = max(
total_boosters_per_hundred,
na.rm = TRUE
),
.groups = "drop"
)
# CIA World Factbook
apify_base <- "https://apify.mortality.watch/cia-world-factbook/"
obesity <- read_csv(paste0(apify_base, "obesity-adult-prevalence-rate.csv"))
alcohol <- read_csv(paste0(apify_base, "alcohol-consumption-per-capita.csv"))
tobacco <- read_csv(paste0(apify_base, "tobacco-use.csv"))
# Add ISO3C code column to each dataset
obesity$iso3c <- countrycode(obesity$name, "country.name", "iso3c")
alcohol$iso3c <- countrycode(alcohol$name, "country.name", "iso3c")
tobacco$iso3c <- countrycode(tobacco$name, "country.name", "iso3c")
# Standardize the `%` values and add type columns
obesity_clean <- obesity |>
mutate(obesity = obesity$`%` / 100) |>
select(iso3c, obesity)
alcohol_clean <- alcohol |>
mutate(alcohol = alcohol$`liters of pure alcohol`) |>
select(iso3c, alcohol)
tobacco_clean <- tobacco |>
mutate(tobacco = tobacco$`%` / 100) |>
select(iso3c, tobacco)
df_cia <- obesity_clean |>
full_join(alcohol_clean, by = "iso3c") |>
full_join(tobacco_clean, by = "iso3c")
# Final dataset
df <- df_mortality |>
inner_join(df_owid, by = join_by(iso3c == iso_code, date)) |>
inner_join(df_cia, by = join_by(iso3c))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment