Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Created September 17, 2021 04:07
Show Gist options
  • Save thoughtfulbloke/004395165fad7bc39378c1335102c3f5 to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/004395165fad7bc39378c1335102c3f5 to your computer and use it in GitHub Desktop.
calculating weekly mortality rates from the OECD data source
# OECD weekly all cause deaths to mortality rates
# Keep in mind, some countries (Sweden) exclude 3% of deaths from the weekly
# dataset as the day of death is not known
library(OECD)
library(dplyr)
country1 = "NZL"
country2 = "USA"
#Getting population if not already saved
popfile = paste0(country1, country2, "pop.csv")
if(!file.exists(popfile)){
dataset <- "QNA"
filter_list <- list(c(country1, country2), "POPNC", "PER","Q")
df <- get_dataset(dataset = dataset, filter = filter_list) %>%
select(LOCATION, obsTime, obsValue)
write.csv(df,file=popfile, row.names = FALSE)
}
#Getting weekly deaths if not already saved
mortfile = paste0(country1, country2, "mort.csv")
if(!file.exists(mortfile)){
dataset <- "HEALTH_MORTALITY"
filter_preformat <- paste0(country1, "+", country2,".1+2+3+4+5+6+7+8+9+10+11+12+13+14+15+16+17+18+19+20+21+22+23+24+25+26+27+28+29+30+31+32+33+34+35+36+37+38+39+40+41+42+43+44+45+46+47+48+49+50+51+52+53.TOTAL.TOTAL.ALLCAUNB")
df <- get_dataset("HEALTH_MORTALITY",
filter = filter_preformat,
start_time = 2015, end_time = 2021, pre_formatted = TRUE) %>%
select(COUNTRY, WEEK, obsTime, obsValue)
write.csv(df,file=mortfile, row.names = FALSE)
}
# Note, at a fine level of detail "weeks" have different start and end days,
# so for any particular country compare how many weeks in each year, and
# compare it to the number of each weekdays in each year to find out on
# which day of the week the week is based.
# see https://twitter.com/Thoughtfulnz/status/1438002243626684416?s=20
# for example: USA week ending Wed, SWE Thu, NZL Sun
#usage example
# already loaded dplyr above
library(tidyr)
library(lubridate)
mort <- read.csv("NZLUSAmort.csv", stringsAsFactors = FALSE)
pop <- read.csv("NZLUSApop.csv", stringsAsFactors = FALSE)
# I'm going to use the quarter as the base population for the mortality of the
# following quarter as that gives me a population for recent mortality entries
pop <- pop %>%
separate(obsTime, into=c("Year", "Quarter"), sep = "-Q", convert = TRUE) %>%
mutate(adjusted_date = ISOdate(Year, Quarter * 3 - 1,1) + days(92),
Year = year(adjusted_date),
Quarter = quarter(adjusted_date)) %>%
select(COUNTRY = LOCATION, Year, Quarter, populationK = obsValue)
# work through the dates of the weeks from the last day of the week the
# country's weeks is based on in 2014. Then generate the quarter to merge the
# data together
# dtseq <- seq.Date(as.Date("2014-12-25"),as.Date("2014-12-31"),"days")
# paste(dtseq, wday(dtseq, label=TRUE))
# "2014-12-25 Thu" "2014-12-26 Fri" "2014-12-27 Sat"
# "2014-12-28 Sun" "2014-12-29 Mon" "2014-12-30 Tue" "2014-12-31 Wed"
combo <- mort %>%
arrange(COUNTRY, obsTime, WEEK) %>%
group_by(COUNTRY) %>%
mutate(weeks_since_2014 = row_number()) %>%
ungroup() %>%
mutate(week_ending = case_when(COUNTRY == country1 ~ as.Date("2014-12-28") + days(weeks_since_2014*7),
COUNTRY == country2 ~ as.Date("2014-12-31") + days(weeks_since_2014*7)),
Quarter = quarter(week_ending)) %>%
select(COUNTRY, Year = obsTime, Quarter, week_ending, Deaths = obsValue) %>%
inner_join(pop, by = c("COUNTRY", "Year", "Quarter")) %>%
mutate(Deaths_per_100K = 100*Deaths/populationK)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment