Created
September 17, 2021 04:07
-
-
Save thoughtfulbloke/004395165fad7bc39378c1335102c3f5 to your computer and use it in GitHub Desktop.
calculating weekly mortality rates from the OECD data source
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# OECD weekly all cause deaths to mortality rates | |
# Keep in mind, some countries (Sweden) exclude 3% of deaths from the weekly | |
# dataset as the day of death is not known | |
library(OECD) | |
library(dplyr) | |
country1 = "NZL" | |
country2 = "USA" | |
#Getting population if not already saved | |
popfile = paste0(country1, country2, "pop.csv") | |
if(!file.exists(popfile)){ | |
dataset <- "QNA" | |
filter_list <- list(c(country1, country2), "POPNC", "PER","Q") | |
df <- get_dataset(dataset = dataset, filter = filter_list) %>% | |
select(LOCATION, obsTime, obsValue) | |
write.csv(df,file=popfile, row.names = FALSE) | |
} | |
#Getting weekly deaths if not already saved | |
mortfile = paste0(country1, country2, "mort.csv") | |
if(!file.exists(mortfile)){ | |
dataset <- "HEALTH_MORTALITY" | |
filter_preformat <- paste0(country1, "+", country2,".1+2+3+4+5+6+7+8+9+10+11+12+13+14+15+16+17+18+19+20+21+22+23+24+25+26+27+28+29+30+31+32+33+34+35+36+37+38+39+40+41+42+43+44+45+46+47+48+49+50+51+52+53.TOTAL.TOTAL.ALLCAUNB") | |
df <- get_dataset("HEALTH_MORTALITY", | |
filter = filter_preformat, | |
start_time = 2015, end_time = 2021, pre_formatted = TRUE) %>% | |
select(COUNTRY, WEEK, obsTime, obsValue) | |
write.csv(df,file=mortfile, row.names = FALSE) | |
} | |
# Note, at a fine level of detail "weeks" have different start and end days, | |
# so for any particular country compare how many weeks in each year, and | |
# compare it to the number of each weekdays in each year to find out on | |
# which day of the week the week is based. | |
# see https://twitter.com/Thoughtfulnz/status/1438002243626684416?s=20 | |
# for example: USA week ending Wed, SWE Thu, NZL Sun | |
#usage example | |
# already loaded dplyr above | |
library(tidyr) | |
library(lubridate) | |
mort <- read.csv("NZLUSAmort.csv", stringsAsFactors = FALSE) | |
pop <- read.csv("NZLUSApop.csv", stringsAsFactors = FALSE) | |
# I'm going to use the quarter as the base population for the mortality of the | |
# following quarter as that gives me a population for recent mortality entries | |
pop <- pop %>% | |
separate(obsTime, into=c("Year", "Quarter"), sep = "-Q", convert = TRUE) %>% | |
mutate(adjusted_date = ISOdate(Year, Quarter * 3 - 1,1) + days(92), | |
Year = year(adjusted_date), | |
Quarter = quarter(adjusted_date)) %>% | |
select(COUNTRY = LOCATION, Year, Quarter, populationK = obsValue) | |
# work through the dates of the weeks from the last day of the week the | |
# country's weeks is based on in 2014. Then generate the quarter to merge the | |
# data together | |
# dtseq <- seq.Date(as.Date("2014-12-25"),as.Date("2014-12-31"),"days") | |
# paste(dtseq, wday(dtseq, label=TRUE)) | |
# "2014-12-25 Thu" "2014-12-26 Fri" "2014-12-27 Sat" | |
# "2014-12-28 Sun" "2014-12-29 Mon" "2014-12-30 Tue" "2014-12-31 Wed" | |
combo <- mort %>% | |
arrange(COUNTRY, obsTime, WEEK) %>% | |
group_by(COUNTRY) %>% | |
mutate(weeks_since_2014 = row_number()) %>% | |
ungroup() %>% | |
mutate(week_ending = case_when(COUNTRY == country1 ~ as.Date("2014-12-28") + days(weeks_since_2014*7), | |
COUNTRY == country2 ~ as.Date("2014-12-31") + days(weeks_since_2014*7)), | |
Quarter = quarter(week_ending)) %>% | |
select(COUNTRY, Year = obsTime, Quarter, week_ending, Deaths = obsValue) %>% | |
inner_join(pop, by = c("COUNTRY", "Year", "Quarter")) %>% | |
mutate(Deaths_per_100K = 100*Deaths/populationK) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment