Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Created March 9, 2022 08:34
Show Gist options
  • Save thoughtfulbloke/5f6244419918ae13146ca91a68b429df to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/5f6244419918ae13146ca91a68b429df to your computer and use it in GitHub Desktop.
library(rvest)
library(readr)
library(dplyr)
library(tidyr)
library(lubridate)
#######
# assuming the DHB subnational population file downloaded
# and cached in the working directory from
# https://figure.nz/table/vEnTmdKKixC0HrEF
# as Population_Estimated_population_by_sex_age_group_and_DHB_at_June_19962021
######
dhbs_2021 <- read_csv("Population_Estimated_population_by_sex_age_group_and_DHB_at_June_19962021.csv") %>%
filter(`Year as at 30 June` == 2021, Sex == "Total") %>%
mutate(Age = case_when(`Age group` == "0-4" ~ "0 to 9",
`Age group` == "5-9" ~ "0 to 9",
`Age group` == "10-14" ~ "10 to 19",
`Age group` == "15-19" ~ "10 to 19",
`Age group` == "20-24" ~ "20 to 29",
`Age group` == "25-29" ~ "20 to 29",
`Age group` == "30-34" ~ "30 to 39",
`Age group` == "35-39" ~ "30 to 39",
`Age group` == "40-44" ~ "40 to 49",
`Age group` == "45-49" ~ "40 to 49",
`Age group` == "50-54" ~ "50 to 59",
`Age group` == "55-59" ~ "50 to 59",
`Age group` == "60-64" ~ "60 to 69",
`Age group` == "65-69" ~ "60 to 69",
`Age group` == "70-74" ~ "70 to 79",
`Age group` == "75-79" ~ "70 to 79",
`Age group` == "80-84" ~ "80 to 89",
`Age group` == "85-89" ~ "80 to 89",
`Age group` == "90-*" ~ "90+"),
DHB = case_when(`District health board` == "Capital & Coast" ~ "Capital and Coast",
`District health board` == "Hutt" ~ "Hutt Valley",
`District health board` == "Tairāwhiti" ~ "Tairawhiti",
`District health board` == "Waitematā" ~ "Waitemata",
TRUE ~ `District health board`)) %>%
filter(!is.na(Age) , DHB != "New Zealand") %>%
select(DHB, Age, Value) %>%
group_by(DHB, Age) %>%
summarise(population = sum(Value), .groups = "drop")
#get latest all cases
lnks <- "https://www.health.govt.nz/covid-19-novel-coronavirus/covid-19-data-and-statistics/covid-19-case-demographics" %>%
read_html() %>%
html_nodes("a") %>% html_attr('href')
csv_lnk <- paste0("https://www.health.govt.nz",
grep("csv$",lnks, value=TRUE))
NZ_cases <- read_csv(csv_lnk, col_types= cols(
`Report Date` = col_date(format = ""),
.default = col_character())) %>%
filter(DHB != "Managed Isolation & Quarantine", is.na(Historical),
DHB != "Unknown") %>%
count(DHB, Age = `Age group`, Date=`Report Date`,name = "Cases") %>%
filter(Date > ymd("2022-01-22")) # you may want to change the filter date
## add zeros where days are missing
max_NZ_date = max(NZ_cases$Date)
NZ_zerod <- expand_grid(DHB = unique(NZ_cases$DHB),
Age = unique(NZ_cases$Age),
Date = unique(NZ_cases$Date)) %>%
mutate(Cases = 0) %>%
bind_rows(NZ_cases) %>%
arrange(DHB,Age,Date,desc(Cases)) %>%
group_by(DHB,Age,Date) %>%
slice(1) %>%
ungroup() %>%
inner_join(dhbs_2021, by = c("DHB", "Age")) %>%
mutate(percent = 100 * Cases / population) %>%
arrange(DHB,Age,Date) %>%
group_by(DHB,Age) %>%
mutate(percent = cumsum(percent)) %>%
ungroup()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment