Last active
December 27, 2024 20:19
-
-
Save USMortality/cf0dc23d0d5ca609539fe11c9c089fcc to your computer and use it in GitHub Desktop.
eASMR (3y) Country Dataset including other variables
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(dplyr) | |
library(tsibble) | |
library(stringr) | |
library(fable) | |
library(tidyr) | |
library(lubridate) | |
library(countrycode) | |
options(vsc.dev.args = list(width = 600 * 2, height = 600 * 2, res = 72 * 2)) | |
# Get Excess Mortality from Mortality.Watch relative to a 3y prepandemic avg. | |
mortality <- read_csv( | |
"https://s3.mortality.watch/data/mortality/world_yearly.csv" | |
) |> | |
filter( | |
date >= 2017, | |
!is.na(asmr_who), | |
!str_starts(iso3c, "USA-"), | |
!str_starts(iso3c, "DEU-"), | |
iso3c != "LIE" | |
) |> | |
as_tsibble(index = date, key = iso3c) | |
# Simple 3y avg model. | |
model <- mortality |> | |
filter(date >= 2017 & date <= 2019) |> | |
model(lm = TSLM(asmr_who)) | |
# Get the fitted values for the pre-2020 period using augment() | |
fitted_values <- model |> | |
augment() |> | |
select(iso3c, date, .fitted) | |
# Get the forecasted values with prediction intervals for the next 4 years | |
forecasts <- model |> | |
forecast(h = "4 years", level = 95) |> | |
mutate(hl = hilo(asmr_who, level = 95)) |> | |
unpack_hilo(cols = hl) |> | |
as_tibble() |> | |
select(iso3c, date, .mean, hl_lower, hl_upper) | |
# Combine the fitted values with the original data for the pre-2020 period | |
ts_augmented <- mortality |> left_join(fitted_values, by = c("iso3c", "date")) | |
# Combine the forecast values with the dataset | |
ts_with_forecast <- ts_augmented |> | |
select(iso3c, date, asmr_who) |> | |
left_join(forecasts, by = c("iso3c", "date")) | |
df_mortality <- ts_with_forecast |> | |
mutate( | |
date = as.integer(date), | |
asmr_who_ex_p = asmr_who / .mean - 1 | |
) |> | |
filter(date >= 2020) |> | |
select(iso3c, date, asmr_who_ex_p) |> | |
as_tibble() | |
# Our World In Data | |
owid <- read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv") | |
df_owid <- owid |> | |
select( | |
iso_code, date, stringency_index, population_density, | |
median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, | |
cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, | |
hospital_beds_per_thousand, life_expectancy, | |
human_development_index, people_vaccinated_per_hundred, | |
total_vaccinations_per_hundred, people_vaccinated_per_hundred, | |
people_fully_vaccinated_per_hundred, total_boosters_per_hundred | |
) |> | |
arrange(iso_code, date) |> | |
mutate(date = year(date)) |> | |
group_by(iso_code) |> | |
fill(total_vaccinations_per_hundred, people_vaccinated_per_hundred, | |
people_fully_vaccinated_per_hundred, total_boosters_per_hundred, | |
.direction = "down" | |
) |> | |
replace_na(list( | |
total_vaccinations_per_hundred = 0, | |
people_vaccinated_per_hundred = 0, | |
people_fully_vaccinated_per_hundred = 0, | |
total_boosters_per_hundred = 0 | |
)) |> | |
group_by(iso_code, date) |> | |
summarize( | |
stringency_index = sum(stringency_index, na.rm = TRUE), | |
population_density = mean(population_density, na.rm = TRUE), | |
median_age = mean(median_age, na.rm = TRUE), | |
aged_65_older = mean(aged_65_older, na.rm = TRUE), | |
aged_70_older = mean(aged_70_older, na.rm = TRUE), | |
gdp_per_capita = mean(gdp_per_capita, na.rm = TRUE), | |
extreme_poverty = mean(extreme_poverty, na.rm = TRUE), | |
cardiovasc_death_rate = mean(cardiovasc_death_rate, na.rm = TRUE), | |
diabetes_prevalence = mean(diabetes_prevalence, na.rm = TRUE), | |
female_smokers = mean(female_smokers, na.rm = TRUE), | |
male_smokers = mean(male_smokers, na.rm = TRUE), | |
hospital_beds_per_thousand = mean(hospital_beds_per_thousand, na.rm = TRUE), | |
life_expectancy = mean(life_expectancy, na.rm = TRUE), | |
human_development_index = mean(human_development_index, na.rm = TRUE), | |
total_vaccinations_per_hundred = max( | |
total_vaccinations_per_hundred, | |
na.rm = TRUE | |
), | |
people_vaccinated_per_hundred = max( | |
people_vaccinated_per_hundred, | |
na.rm = TRUE | |
), | |
people_fully_vaccinated_per_hundred = max( | |
people_fully_vaccinated_per_hundred, | |
na.rm = TRUE | |
), | |
total_boosters_per_hundred = max( | |
total_boosters_per_hundred, | |
na.rm = TRUE | |
), | |
.groups = "drop" | |
) | |
# CIA World Factbook | |
apify_base <- "https://apify.mortality.watch/cia-world-factbook/" | |
obesity <- read_csv(paste0(apify_base, "obesity-adult-prevalence-rate.csv")) | |
alcohol <- read_csv(paste0(apify_base, "alcohol-consumption-per-capita.csv")) | |
tobacco <- read_csv(paste0(apify_base, "tobacco-use.csv")) | |
# Add ISO3C code column to each dataset | |
obesity$iso3c <- countrycode(obesity$name, "country.name", "iso3c") | |
alcohol$iso3c <- countrycode(alcohol$name, "country.name", "iso3c") | |
tobacco$iso3c <- countrycode(tobacco$name, "country.name", "iso3c") | |
# Standardize the `%` values and add type columns | |
obesity_clean <- obesity |> | |
mutate(obesity = obesity$`%` / 100) |> | |
select(iso3c, obesity) | |
alcohol_clean <- alcohol |> | |
mutate(alcohol = alcohol$`liters of pure alcohol`) |> | |
select(iso3c, alcohol) | |
tobacco_clean <- tobacco |> | |
mutate(tobacco = tobacco$`%` / 100) |> | |
select(iso3c, tobacco) | |
df_cia <- obesity_clean |> | |
full_join(alcohol_clean, by = "iso3c") |> | |
full_join(tobacco_clean, by = "iso3c") | |
# Final dataset | |
df <- df_mortality |> | |
inner_join(df_owid, by = join_by(iso3c == iso_code, date)) |> | |
inner_join(df_cia, by = join_by(iso3c)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment