Skip to content

Instantly share code, notes, and snippets.

@MattCowgill
Created May 21, 2023 10:22
Show Gist options
  • Save MattCowgill/a601d10e650f44510ecffb14715475c2 to your computer and use it in GitHub Desktop.
Save MattCowgill/a601d10e650f44510ecffb14715475c2 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(readxl)
library(lubridate)
dl_file <- function(date) {
month_year <- format(date, "%b-%Y") |>
tolower()
file_format <- if_else(date <= ymd("2021-03-01"),
".xls",
".xlsx")
url <- paste0("https://www.abs.gov.au/statistics/labour/employment-and-unemployment/labour-force-australia/",
month_year,
"/Insights%20from%20the%20original%20data",
file_format)
path <- tempfile(fileext = file_format)
download.file(url, path, mode = "wb")
return(path)
}
import_sheet <- function(path, sheet) {
raw_df <- read_excel(path, sheet = sheet, range = "A6:D8", col_names = F,
.name_repair = "minimal")
table_title <- read_excel(path, sheet = sheet, range = "A4",
col_names = c("raw_title"),
.name_repair = "minimal")
metadata <- table_title |>
mutate(title = str_remove_all(raw_title, ".*\\.")) |>
separate(title, into = c("metric", "months", "series_type"), sep = ",") |>
separate(months, into = c("month_1", "month_2"), sep = " - ") |>
mutate(across(everything(), str_squish)) |>
select(metric, month_1, month_2) |>
pivot_longer(cols = starts_with("month"),
names_to = "month_type",
values_to = "month")
month_1 <- raw_df |>
select(sample = 1,
value = 2) |>
mutate(month_type = "month_1")
month_2 <- raw_df |>
select(sample = 3,
value = 4) |>
mutate(month_type = "month_2")
dat <- bind_rows(month_1, month_2)
dat |>
left_join(metadata, by = "month_type") |>
select(-month_type)
}
import_file <- function(path) {
map_dfr(c("Data1", "Data2", "Data3"),
import_sheet,
path = path)
}
files <- seq.Date(from = ymd("2023-04-01"),
to = ymd("2020-06-01"),
by = "-1 month") |>
map_chr(dl_file,
.progress = TRUE)
raw_data <- map_dfr(files, import_file,
.progress = TRUE) |>
mutate(date = dmy(paste0("01 ", month))) |>
select(-month)
urs <- raw_data |>
filter(sample %in% c("Outgoing rotation group",
"Incoming rotation group")) |>
group_by(date, sample) |>
summarise(ur = value[metric == "Unemployed"] /
(value[metric == "Unemployed"] + value[metric == "Employed"])) |>
ungroup()
urs |>
pivot_wider(names_from = sample,
values_from = ur) |>
mutate(`Outgoing rotation group` = lag(`Outgoing rotation group`),
diff = `Incoming rotation group` - `Outgoing rotation group`) |>
ggplot(aes(x = date, y = diff)) +
geom_hline(yintercept = 0) +
geom_col() +
scale_y_continuous(labels = \(x) paste0(x * 100, " ppts")) +
scale_x_date(breaks = seq(max(urs$date),
min(urs$date),
"-1 year"),
date_labels = "%b\n%Y") +
theme_minimal() +
theme(panel.grid.minor = element_blank(),
axis.title = element_blank()) +
labs(subtitle = "Difference between unemployment rate of incoming rotation group\nand of previous month's outgoing rotation group")
@MattCowgill
Copy link
Author

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment