thoughtfulbloke · September 17, 2021 04:07
diff --git a/OECDmortRate.R b/OECDmortRate.R
 # OECD weekly all cause deaths to mortality rates
 # Keep in mind, some countries (Sweden) exclude 3% of deaths from the weekly 
 # dataset as the day of death is not known

 library(OECD)
 library(dplyr)

 country1 = "NZL"
 country2 = "USA"

 #Getting population if not already saved
 popfile = paste0(country1, country2, "pop.csv")
 if(!file.exists(popfile)){
  dataset <- "QNA"
  filter_list <- list(c(country1, country2), "POPNC", "PER","Q")
  df <- get_dataset(dataset = dataset, filter = filter_list) %>% 
    select(LOCATION, obsTime, obsValue)
  write.csv(df,file=popfile, row.names = FALSE)
 }

 #Getting weekly deaths if not already saved
 mortfile = paste0(country1, country2, "mort.csv")
 if(!file.exists(mortfile)){
  dataset <- "HEALTH_MORTALITY"
  filter_preformat <- paste0(country1, "+", country2,".1+2+3+4+5+6+7+8+9+10+11+12+13+14+15+16+17+18+19+20+21+22+23+24+25+26+27+28+29+30+31+32+33+34+35+36+37+38+39+40+41+42+43+44+45+46+47+48+49+50+51+52+53.TOTAL.TOTAL.ALLCAUNB")
  df <- get_dataset("HEALTH_MORTALITY", 
              filter = filter_preformat,
              start_time = 2015, end_time = 2021, pre_formatted = TRUE) %>%
    select(COUNTRY, WEEK, obsTime, obsValue)
  write.csv(df,file=mortfile, row.names = FALSE)
 }

 # Note, at a fine level of detail "weeks" have different start and end days,
 # so for any particular country compare how many weeks in each year, and
 # compare it to the number of each weekdays in each year to find out on
 # which day of the week the week is based.
 # see https://twitter.com/Thoughtfulnz/status/1438002243626684416?s=20
 # for example: USA week ending Wed, SWE Thu, NZL Sun

 #usage example

 # already loaded dplyr above
 library(tidyr)
 library(lubridate)


 mort <- read.csv("NZLUSAmort.csv", stringsAsFactors = FALSE)
 pop <- read.csv("NZLUSApop.csv", stringsAsFactors = FALSE)
 # I'm going to use the quarter as the base population for the mortality of the
 # following quarter as that gives me a population for recent mortality entries

 pop <- pop %>%
  separate(obsTime, into=c("Year", "Quarter"), sep = "-Q", convert = TRUE) %>%
  mutate(adjusted_date = ISOdate(Year, Quarter * 3 - 1,1) + days(92),
         Year = year(adjusted_date),
         Quarter = quarter(adjusted_date)) %>% 
  select(COUNTRY = LOCATION, Year, Quarter, populationK = obsValue)

 # work through the dates of the weeks from the last day of the week the
 # country's weeks is based on in 2014. Then generate the quarter to merge the
 # data together
 # dtseq <- seq.Date(as.Date("2014-12-25"),as.Date("2014-12-31"),"days")
 # paste(dtseq, wday(dtseq, label=TRUE))
 # "2014-12-25 Thu" "2014-12-26 Fri" "2014-12-27 Sat"
 # "2014-12-28 Sun" "2014-12-29 Mon" "2014-12-30 Tue" "2014-12-31 Wed"
 combo <- mort %>%
  arrange(COUNTRY, obsTime, WEEK) %>%
  group_by(COUNTRY) %>%
  mutate(weeks_since_2014 = row_number()) %>%
  ungroup() %>%
  mutate(week_ending = case_when(COUNTRY == country1 ~ as.Date("2014-12-28") + days(weeks_since_2014*7),
                                 COUNTRY == country2 ~ as.Date("2014-12-31") + days(weeks_since_2014*7)),
         Quarter = quarter(week_ending)) %>%
  select(COUNTRY, Year = obsTime, Quarter, week_ending, Deaths = obsValue) %>%
  inner_join(pop, by = c("COUNTRY", "Year", "Quarter")) %>%
  mutate(Deaths_per_100K = 100*Deaths/populationK)
	# OECD weekly all cause deaths to mortality rates
	# Keep in mind, some countries (Sweden) exclude 3% of deaths from the weekly
	# dataset as the day of death is not known

	library(OECD)
	library(dplyr)

	country1 = "NZL"
	country2 = "USA"

	#Getting population if not already saved
	popfile = paste0(country1, country2, "pop.csv")
	if(!file.exists(popfile)){
	dataset <- "QNA"
	filter_list <- list(c(country1, country2), "POPNC", "PER","Q")
	df <- get_dataset(dataset = dataset, filter = filter_list) %>%
	select(LOCATION, obsTime, obsValue)
	write.csv(df,file=popfile, row.names = FALSE)
	}

	#Getting weekly deaths if not already saved
	mortfile = paste0(country1, country2, "mort.csv")
	if(!file.exists(mortfile)){
	dataset <- "HEALTH_MORTALITY"
	filter_preformat <- paste0(country1, "+", country2,".1+2+3+4+5+6+7+8+9+10+11+12+13+14+15+16+17+18+19+20+21+22+23+24+25+26+27+28+29+30+31+32+33+34+35+36+37+38+39+40+41+42+43+44+45+46+47+48+49+50+51+52+53.TOTAL.TOTAL.ALLCAUNB")
	df <- get_dataset("HEALTH_MORTALITY",
	filter = filter_preformat,
	start_time = 2015, end_time = 2021, pre_formatted = TRUE) %>%
	select(COUNTRY, WEEK, obsTime, obsValue)
	write.csv(df,file=mortfile, row.names = FALSE)
	}

	# Note, at a fine level of detail "weeks" have different start and end days,
	# so for any particular country compare how many weeks in each year, and
	# compare it to the number of each weekdays in each year to find out on
	# which day of the week the week is based.
	# see https://twitter.com/Thoughtfulnz/status/1438002243626684416?s=20
	# for example: USA week ending Wed, SWE Thu, NZL Sun

	#usage example

	# already loaded dplyr above
	library(tidyr)
	library(lubridate)


	mort <- read.csv("NZLUSAmort.csv", stringsAsFactors = FALSE)
	pop <- read.csv("NZLUSApop.csv", stringsAsFactors = FALSE)
	# I'm going to use the quarter as the base population for the mortality of the
	# following quarter as that gives me a population for recent mortality entries

	pop <- pop %>%
	separate(obsTime, into=c("Year", "Quarter"), sep = "-Q", convert = TRUE) %>%
	mutate(adjusted_date = ISOdate(Year, Quarter * 3 - 1,1) + days(92),
	Year = year(adjusted_date),
	Quarter = quarter(adjusted_date)) %>%
	select(COUNTRY = LOCATION, Year, Quarter, populationK = obsValue)

	# work through the dates of the weeks from the last day of the week the
	# country's weeks is based on in 2014. Then generate the quarter to merge the
	# data together
	# dtseq <- seq.Date(as.Date("2014-12-25"),as.Date("2014-12-31"),"days")
	# paste(dtseq, wday(dtseq, label=TRUE))
	# "2014-12-25 Thu" "2014-12-26 Fri" "2014-12-27 Sat"
	# "2014-12-28 Sun" "2014-12-29 Mon" "2014-12-30 Tue" "2014-12-31 Wed"
	combo <- mort %>%
	arrange(COUNTRY, obsTime, WEEK) %>%
	group_by(COUNTRY) %>%
	mutate(weeks_since_2014 = row_number()) %>%
	ungroup() %>%
	mutate(week_ending = case_when(COUNTRY == country1 ~ as.Date("2014-12-28") + days(weeks_since_2014*7),
	COUNTRY == country2 ~ as.Date("2014-12-31") + days(weeks_since_2014*7)),
	Quarter = quarter(week_ending)) %>%
	select(COUNTRY, Year = obsTime, Quarter, week_ending, Deaths = obsValue) %>%
	inner_join(pop, by = c("COUNTRY", "Year", "Quarter")) %>%
	mutate(Deaths_per_100K = 100*Deaths/populationK)