Created
June 7, 2021 20:22
-
-
Save jakeybob/902ff979cd8b7c5cabd8b0628a2b70ff to your computer and use it in GitHub Desktop.
Scotland standardised age/sex interpolated populations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(janitor) | |
library(lubridate) | |
library(zoo) | |
library(ckanr) | |
# generates age/sex population lookup for Scotland based on NRS mid-year | |
# population estimates/projections, interpolated linearly to day level | |
# (July 2nd used as mid-year reference date). | |
# | |
# output is aggregated to age band and sex level, with data being | |
# a) population of that age/sex breakdown on each date, | |
# b) national population on each date, and | |
# c) standardised age/sex population on each date (standardised to most recent year in the NRS estimates opendata) | |
# SETUP #### | |
# output file will be clipped to these dates | |
min_date <- dmy("01/06/2013") | |
max_date <- dmy("01/01/2023") | |
ckanr_setup(url = "https://www.opendata.nhs.scot/") | |
res_est <- resource_show(id = "27a72cc8-d6d8-430c-8b4f-3109a9ceadb1") # pop estimates | |
res_proj <- resource_show(id = "0876fc67-05e6-4e87-bc30-c4b0756fff04") # pop projections | |
# GET DATA #### | |
# national population estimates (retrospective) | |
data_est <- ckan_fetch(x=res_est$url) %>% | |
clean_names() %>% | |
filter(hb == "S92000003", | |
sex != "All") %>% | |
select(year, sex, contains("age"), -contains("ages")) | |
# national population projections (for years with no estimates available) | |
data_proj <- ckan_fetch(x=res_proj$url) %>% | |
clean_names() %>% | |
filter(hb == "S92000003", | |
sex != "All") %>% | |
select(year, sex, contains("age"), -contains("ages")) %>% | |
filter(year %in% unique(data_est$year) == FALSE) | |
# combine and format estimate and projections | |
data <- data_est %>% | |
bind_rows(data_proj) %>% | |
distinct() %>% | |
mutate(sex = tolower(sex)) %>% | |
arrange(year, sex) %>% | |
pivot_longer(starts_with("age"), names_to = "age", values_to = "pop") %>% | |
mutate(age = as.integer(str_extract(age, "\\d+"))) %>% | |
mutate(date = dmy(paste0("02/07/", year))) %>% | |
select(-year) %>% | |
mutate(age_band = case_when(age < 20 ~ "<20", | |
age <= 29 ~ "20-29", | |
age <= 39 ~ "30-39", | |
age <= 49 ~ "40-49", | |
age <= 59 ~ "50-59", | |
age <= 69 ~ "60-69", | |
age <= 79 ~ "70-79", | |
age >= 80 ~ "80+", | |
TRUE ~ NA_character_)) %>% | |
group_by(sex, date, age_band) %>% | |
summarise(pop = sum(pop)) %>% | |
group_by(date) %>% | |
mutate(pop_scot = sum(pop)) | |
# STANDARDISE #### | |
# use most recent NRS population estimate (i.e. not projection) as the year to standardise to | |
standard_pop <- data %>% | |
ungroup() %>% | |
filter(date == dmy(paste0("02/07/", max(data_est$year)))) %>% | |
mutate(factor = pop/sum(pop)) %>% | |
select(-pop) | |
data <- data %>% | |
left_join(standard_pop %>% select(-date, -pop_scot)) %>% | |
mutate(pop_std = factor*pop_scot) %>% select(-factor) %>% ungroup() | |
# create df of all date/sex/age combinations, join on the population data, | |
# then linearly interpolate between all the mid-year points | |
df <- crossing(date = seq.Date(from = dmy(paste0("02/07/", min(data_est$year, data_proj$year))), | |
to = dmy(paste0("02/07/", max(data_est$year, data_proj$year))), | |
by = "day"), | |
sex = unique(data$sex), | |
age_band = unique(data$age_band)) %>% | |
left_join(data) %>% | |
group_by(sex, age_band) %>% | |
arrange(sex, age_band, date) %>% | |
mutate(across(starts_with("pop"), ~na.approx(.))) %>% | |
ungroup() %>% | |
select(date, sex, age_band, starts_with("pop")) | |
# OUTPUT #### | |
write_rds(df %>% filter(date >= min_date, date <= max_date), | |
"std_pop_daily.rds", compress = "gz") | |
# # quick plot test | |
# df %>% | |
# filter(date >= min_date, date <= max_date) %>% | |
# ggplot(aes(x = date, y = pop_std, colour = sex)) + | |
# geom_line() + | |
# facet_wrap(~age_band) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment