Created
November 10, 2022 14:03
-
-
Save jkr216/49a41a3f26ec34a009c0909fd23a94e5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Packages and setup | |
```{r setup, include=FALSE} | |
library(tidyverse) # the fundamentals | |
library(lubridate) # working with dates | |
library(timetk) # time series Swiss Army Knife, not used today but worth a look | |
library(tidyquant) # great finance functions and FRED importer | |
library(readxl) | |
library(scales) | |
library(fredr) # meta data on Fred data sets | |
library(janitor) # clean column names | |
library(gt) | |
library(gtExtras) | |
library(downloadthis) # create downloaders | |
# fred_api_key <- Sys.getenv("FRED_API_KEY") | |
knitr::opts_chunk$set(echo = TRUE) | |
``` | |
### Import and Wrangle the Data | |
Our first goal is to import data on relative importance or weights of CPI components. | |
That data is available here: | |
https://www.bls.gov/cpi/tables/relative-importance/home.htm | |
Let's use the `import dataset` button to grab the spreadsheet. | |
```{r} | |
url <- "https://www.bls.gov/cpi/tables/relative-importance/2021.xlsx" | |
destfile <- "X2021.xlsx" | |
curl::curl_download(url, destfile) | |
relative_importance <- | |
read_excel(destfile, skip = 9) %>% | |
slice(-1) %>% | |
select(-4) %>% | |
rename(level = 1, item = 2, cpi_u = 3) | |
``` | |
We are going to work with `Level 1` components. A bit tricky here because we don't want the aggregates that lurk at the bottom of the spreadsheet, so not a simple filter. | |
```{r} | |
relative_importance %>% | |
filter(level == 1) | |
``` | |
We want all the level 1's that appear above `Special aggregate`. | |
```{r} | |
grepl("Special aggregate", relative_importance$item, fixed = TRUE) | |
cumsum(grepl("Special aggregate", relative_importance$item, fixed = TRUE)) | |
``` | |
Place that in a call to `filter()` and pipe to `adorn_totals()` for a sanity check. | |
```{r} | |
relative_importance %>% | |
filter(cumsum(grepl("Special aggregate", item, fixed = TRUE)) < 1) %>% | |
filter(level == 1) %>% | |
# janitor package | |
adorn_totals() | |
``` | |
```{r} | |
relative_importance %>% | |
filter(cumsum(grepl("Special aggregate", item, fixed = TRUE)) < 1) %>% | |
filter(level == 1) %>% | |
select(-level) %>% | |
arrange(-cpi_u) %>% | |
adorn_totals() %>% | |
tibble() %>% | |
gt() %>% # ggplot uses + | |
cols_label( | |
item = "", | |
cpi_u = "CPI Weight" | |
) %>% | |
fmt_percent( | |
columns = cpi_u, | |
scale_values = F | |
) | |
``` | |
We could keep going along this path of drilling down into levels. | |
```{r} | |
relative_importance %>% | |
filter(cumsum(grepl("Apparel", item, fixed = TRUE)) < 1) %>% | |
filter(cumsum(grepl("Housing", item, fixed = TRUE)) >= 1) %>% | |
# filter(level == 2) %>% | |
filter(level == 3) %>% | |
select(-level) %>% | |
arrange(-cpi_u) %>% | |
adorn_totals() %>% | |
tibble() %>% | |
gt() %>% | |
cols_label( | |
item = "", | |
cpi_u = "CPI Weight" | |
) %>% | |
fmt_percent( | |
columns = cpi_u, | |
scale_values = F | |
) %>% | |
tab_header(title = "Housing Subcomponents", | |
subtitle = "CPI Recent") | |
``` | |
Interesting stuff but not really what most people want to see when they think of `Inflation`. Let's head back to the `Level 1` components and import their index histories. The weights give context to those indexes, but the indexes get all the headlines. | |
Let's use `tribble()` to manually create a data frame with Fred codes and `item` labels. We use `item` so we can `left_join()` with our `relative_importance` tibble. | |
```{r} | |
level_1_cpi_components_manual <- | |
tribble( | |
~symbol, ~item, | |
"CPIAPPSL", "Apparel", | |
"CPIMEDSL", "Medical care", | |
"CPIHOSSL", "Housing", | |
"CPIFABSL", "Food and beverages", | |
"CPITRNSL", "Transportation", | |
"CPIEDUSL", "Education and communication", | |
"CPIRECSL", "Recreation", | |
"CPIOGSSL", "Other goods and services" | |
) | |
# alternate way of exploring with fredr | |
# need Fred API Key | |
# fredr::fredr_series_search_id("CPI") %>% | |
# filter(frequency == "Monthly", | |
# str_detect(title, "Consumer Price Index for All Urban Consumers"), | |
# seasonal_adjustment_short == "SA", | |
# !str_detect(title, "All Items")) | |
``` | |
```{r} | |
level_1_cpi_fred_symbols <- | |
relative_importance %>% | |
filter(cumsum(grepl("Special", item, fixed = TRUE)) < 1) %>% | |
filter(level == 1) %>% | |
left_join( | |
level_1_cpi_components_manual | |
) %>% | |
rename(cpi_weight = cpi_u) %>% | |
arrange(-cpi_weight) | |
``` | |
Pass the Fred symbols to `tq_get()`. | |
```{r} | |
level_1_cpi_data <- | |
level_1_cpi_fred_symbols %>% | |
pull(symbol) %>% | |
tq_get(get = "economic.data", from = "1979-01-01") %>% | |
left_join( | |
level_1_cpi_fred_symbols %>% select(-level) , | |
by = "symbol" | |
) %>% | |
select(-symbol) | |
level_1_cpi_data %>% | |
head() | |
``` | |
### Building a Table with `gt` | |
First, let's look at percent change by month. We'll do some transforming with `mutate()` and then `pivot_wider()` because people like dates running across columns. | |
```{r} | |
level_1_wide_data_for_gt <- | |
level_1_cpi_data %>% | |
group_by(item, cpi_weight) %>% | |
mutate( | |
yoy_change = price/lag(price, 12) - 1, | |
date = as.yearmon(date), | |
cpi_weight = cpi_weight | |
) %>% | |
select(-price) %>% | |
filter(date >= "2021-09-01") %>% | |
arrange(-date) %>% | |
pivot_wider(names_from = date, values_from = yoy_change) %>% | |
ungroup() | |
level_1_wide_data_for_gt | |
``` | |
Let's painstakingly turn this into a table. | |
```{r} | |
gt_table_1 <- | |
level_1_wide_data_for_gt %>% | |
gt() %>% | |
tab_header(title = "CPI Level 1 YoY % Changes") %>% | |
cols_label( | |
item = "", | |
cpi_weight = "Weight" | |
) %>% | |
fmt_percent( | |
columns = is.numeric, #contains("20") | |
decimals = 2, | |
scale_values = T | |
) %>% | |
fmt_percent( | |
columns = cpi_weight, | |
decimals = 2, | |
# Important line next for scaling values | |
scale_values = F | |
) %>% | |
data_color( | |
# columns = cpi_weight, | |
columns = contains("20"), | |
colors = scales::col_numeric( | |
colorspace::diverge_hcl(n = 20, palette = "Blue-Red 3") %>% rev(), | |
domain = c(-.15, .25)) | |
) %>% | |
tab_style( | |
style = list( | |
cell_text(color = "darkgreen") | |
), | |
locations = cells_body( | |
columns = vars(cpi_weight), | |
rows = cpi_weight > 10 | |
) | |
) | |
gt_table_1 | |
``` | |
```{r} | |
gt_table_2 <- | |
gt_table_1 %>% | |
tab_footnote(footnote = "Weights as of Feb 2022", | |
locations = cells_column_labels( | |
columns = 2 | |
)) %>% | |
tab_source_note(html("Data from BLS via <a href='https://fred.stlouisfed.org/'>FRED</a>")) %>% | |
tab_options( | |
row_group.border.top.width = px(3), | |
row_group.border.top.color = "black", | |
row_group.border.bottom.color = "black", | |
table_body.hlines.color = "white", | |
table.border.top.color = "white", | |
table.border.top.width = px(3), | |
table.border.bottom.color = "white", | |
table.border.bottom.width = px(3), | |
column_labels.border.bottom.color = "black", | |
column_labels.border.bottom.width = px(2), | |
) | |
gt_table_2 | |
``` | |
Two great packages to help us out: | |
`gtExtras` and `downloadthis` | |
```{r} | |
library(gtExtras) | |
library(downloadthis) | |
``` | |
Perhaps we want to display weights as a bar chart. We can use `gt_plt_bar_pct()` from `gtExtras`. | |
```{r} | |
gt_table_3 <- | |
gt_table_2 %>% | |
gt_plt_bar_pct( | |
column = cpi_weight, | |
scaled = T, | |
fill = "steelblue" | |
) %>% | |
cols_width( | |
cpi_weight ~px(100) | |
) | |
gt_table_3 | |
``` | |
### Add a sparkline | |
We can add a `sparkline` to display a time series chart in the table. First we need a list column - we need this data stored in one row. | |
```{r} | |
cpi_data_for_sparkline <- | |
level_1_cpi_data %>% | |
group_by(item, cpi_weight) %>% | |
filter(date > "2020-04-01") %>% | |
summarise(cpi = list(price)) | |
``` | |
Next we join that list column to our original data and use `gt_sparkline()` from `gtExtras`. | |
```{r} | |
level_1_wide_data_for_gt %>% | |
select(item, cpi_weight, contains("2022")) %>% | |
left_join( | |
cpi_data_for_sparkline, | |
by = c("item", "cpi_weight") | |
) %>% | |
gt() %>% | |
cols_label( | |
item = "", | |
cpi_weight = "Weight", | |
cpi = "Since 1990" | |
) %>% | |
fmt_percent( | |
columns = is.numeric, | |
decimals = 2, | |
) %>% | |
fmt_percent( | |
columns = cpi_weight, | |
decimals = 2, | |
scale_values = F | |
) %>% | |
data_color( | |
columns = contains("20"), | |
colors = scales::col_numeric(colorspace::diverge_hcl(n = 25, palette = "Blue-Red 3") %>% rev(), | |
domain = c(-.1, .25)) | |
) %>% | |
cols_width( | |
cpi ~ px(150) | |
) %>% | |
cols_align( | |
align = "center" | |
) %>% | |
gt_plt_bar_pct( | |
column = cpi_weight, | |
scaled = T, | |
fill = "steelblue" | |
) %>% | |
gtExtras::gt_plt_sparkline(cpi) %>% | |
gt_theme_538() %>% | |
tab_header(title = "CPI Level 1 YoY % Changes and History") %>% | |
opt_align_table_header( align = c("center")) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment