Created
June 5, 2021 19:54
-
-
Save jakeybob/93e7bf0600cba83475146099e3ccf102 to your computer and use it in GitHub Desktop.
Scotland age/sex interpolated population estimates/projections
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(janitor) | |
library(lubridate) | |
library(zoo) | |
library(ckanr) | |
# generates age/sex population lookup for Scotland based on NRS mid-year | |
# population estimates/projections, interpolated linearly to day level | |
# (July 2nd used as mid-year ref date) | |
# NOTE: oldest age group will be composite e.g. "90+" | |
ckanr_setup(url = "https://www.opendata.nhs.scot/") | |
res_est <- resource_show(id = "27a72cc8-d6d8-430c-8b4f-3109a9ceadb1") # pop estimates | |
res_proj <- resource_show(id = "0876fc67-05e6-4e87-bc30-c4b0756fff04") # pop projections | |
# national population estimates (retrospective) | |
data_est <- ckan_fetch(x=res_est$url) %>% | |
clean_names() %>% | |
filter(hb == "S92000003", | |
sex != "All") %>% | |
select(year, sex, contains("age"), -contains("ages")) | |
# national population projections (for years with no estimates available) | |
data_proj <- ckan_fetch(x=res_proj$url) %>% | |
clean_names() %>% | |
filter(hb == "S92000003", | |
sex != "All") %>% | |
select(year, sex, contains("age"), -contains("ages")) %>% | |
filter(year %in% unique(data_est$year) == FALSE) | |
# combine and format estimate and projections | |
data <- data_est %>% | |
bind_rows(data_proj) %>% | |
distinct() %>% | |
mutate(sex = tolower(sex)) %>% | |
arrange(year, sex) %>% | |
pivot_longer(starts_with("age"), names_to = "age", values_to = "pop") %>% | |
mutate(age_desc = str_remove(age, "age")) %>% | |
mutate(age = as.integer(str_extract(age, "\\d+"))) %>% | |
mutate(date = dmy(paste0("02/07/", year))) %>% | |
select(-year) | |
# create df of all date/sex/age combinations, join on the population data, | |
# then linearly interpolate between all the mid-year points | |
df <- crossing(date = seq.Date(from = dmy(paste0("02/07/", min(data_est$year, data_proj$year))), | |
to = dmy(paste0("02/07/", max(data_est$year, data_proj$year))), | |
by = "day"), | |
sex = unique(data$sex), | |
age = unique(data$age)) %>% | |
left_join(data) %>% | |
group_by(sex, age) %>% | |
mutate(pop = na.approx(pop)) %>% | |
ungroup() %>% | |
select(date, sex, age, age_desc, pop) | |
# output data | |
write_rds(df %>% filter(date >= dmy("01/01/2015"), date <= dmy("01/01/2023")), | |
"scot_pop_daily.rds", compress = "gz") | |
# quick plot test | |
df %>% | |
filter(date >= dmy("01/01/2015"), date <= today()) %>% | |
filter(age %in% c(10:25)) %>% | |
# filter(age > 80) %>% | |
ggplot(aes(x = date, y = pop, colour = sex)) + | |
geom_line() + | |
facet_wrap(~age) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment