Skip to content

Instantly share code, notes, and snippets.

@walkerke
Created May 18, 2023 18:57
Show Gist options
  • Save walkerke/0de1978bbf3d87ad7d97e3168e0fdedc to your computer and use it in GitHub Desktop.
Save walkerke/0de1978bbf3d87ad7d97e3168e0fdedc to your computer and use it in GitHub Desktop.
# Install packages (if necessary)
# install.packages(c("tidycensus", "tidyverse", "tmap", "mapview"))
# Load libraries
library(tidycensus)
library(tidyverse)
# Optional: if you plan to be a power user (500+ queries / day)
# Get a Census API key at https://api.census.gov/data/key_signup.html.
# Activate the key from the link in your email.
# Now you can get started!
# Restart R after running this line if you plan to install your key.
# Otherwise, don't worry about it.
install <- FALSE
census_api_key("YOUR KEY", install = install)
# Decennial Census data with `get_decennial()`
# 2020 data are available from the PL-94171 redistricting file
# Next week: the new Demographic and Housing Characteristics file!
# The `geography` argument controls the level of aggregation;
# many geographies can be optionally subset by state and / or county
md_pop_2020 <- get_decennial(
geography = "county",
variables = "P1_001N",
state = "MD",
year = 2020
)
# Either print out the first few rows in your R console:
md_pop_2020
# Or view the entire dataset in RStudio
View(md_pop_2020)
# ACS data are available with `get_acs()`.
# More variables, but with margins of error.
md_income <- get_acs(
geography = "tract",
variables = "B19013_001",
state = "MD",
year = 2021
)
View(md_income)
# The `variables` argument takes one or more Census variable codes.
# How to identify them?
# `load_variables()` fetches a dataset of Census variables for
# a given year and dataset.
# For the detailed tables:
acs_detailed <- load_variables(2021, "acs5")
# For the Data Profile (pre-computed data):
acs_profile <- load_variables(2021, "acs5/profile")
View(acs_profile)
# Tables of Census or ACS data can be requested with a `table` argument
# For the ACS, use the characters that precede the underscore for the table ID
md_income_brackets <- get_acs(
geography = "county",
table = "B19001",
state = "MD",
year = 2021
)
# Some analysts prefer data with variables spread across the columns;
# you can get this format with `output = "wide"`
md_income_wide <- get_acs(
geography = "county",
table = "B19001",
state = "MD",
year = 2021,
output = "wide"
)
# tidycensus outputs (long or wide) can be analyzed with tidyverse tools
# Example: create a new column, "Percent of households earning over $200k"
md_200k <- md_income_wide %>%
mutate(percent_200k = 100 * (B19001_017E / B19001_001E)) %>%
select(GEOID, NAME, percent_200k) %>%
arrange(desc(percent_200k))
# Output can be visualized with the popular ggplot2 package, part of the tidyverse
ggplot(md_200k, aes(x = NAME, y = percent_200k)) +
geom_col()
# Not great! Let's clean it up:
md_200k %>%
mutate(NAME = str_remove(NAME, ", Maryland"),
NAME = ifelse(str_detect(NAME, "Baltimore"),
NAME,
str_remove(NAME, " County"))) %>%
ggplot(aes(x = percent_200k, y = reorder(NAME, percent_200k))) +
geom_col(fill = "#E21833", alpha = 0.8) +
theme_minimal(base_size = 12) +
labs(title = "Percent of households earning $200k or more",
subtitle = "Counties in Maryland",
caption = "Data source: 2017-2021 American Community Survey",
x = "ACS estimate (percent)",
y = "")
#### GIS workflows with tidycensus
# The argument `geometry = TRUE` gets you pre-joined geographic and demographic
# data - no need to carry out the steps yourselves!
md_2020_geo <- get_decennial(
geography = "county",
state = "MD",
variables = c(total_pop = "P2_001N",
hispanic = "P2_002N"),
year = 2020,
output = "wide",
geometry = TRUE
) %>%
mutate(percent_hispanic = 100 * (hispanic / total_pop))
# To browse your data interactively, use the mapview package
library(mapview)
mapview(md_2020_geo)
# ggplot2 can map this data with `geom_sf()`
# We'll focus here on the tmap package, which offers a familiar
# interface to GIS users.
# We initialize a map with `tm_shape()`, then specify how to draw
# the data with `tm_polygons()`:
library(tmap)
tm_shape(md_2020_geo) +
tm_polygons()
# A choropleth map can be rendered by specifying a column to map:
tm_shape(md_2020_geo) +
tm_polygons(col = "percent_hispanic")
# We'll likely want to do some customization. Let's change the colors
# and move the legend outside the map frame:
tm_shape(md_2020_geo) +
tm_polygons(col = "percent_hispanic", palette = "Purples",
title = "% Hispanic, 2020") +
tm_layout(legend.outside = TRUE)
# We can also modify map elements for clarity and change
# the breaks from "pretty" (the default) to Jenks natural breaks
# (the default in ArcGIS)
tm_shape(md_2020_geo) +
tm_polygons(col = "percent_hispanic", palette = "Purples",
title = "% Hispanic, 2020", style = "jenks") +
tm_layout(legend.outside = TRUE, bg.color = "grey80")
# Alternative map types are also available, like graduated symbols:
tm_shape(md_2020_geo) +
tm_polygons() +
tm_bubbles(size = "hispanic",
col = "navy",
alpha = 0.5,
scale = 3,
title.size = "Hispanic residents, 2020") +
tm_layout(legend.outside = TRUE,
legend.outside.position = "bottom")
# To automatically convert to an interactive map, use `tmap_mode("view")`
tmap_mode("view")
tm_shape(md_2020_geo) +
tm_polygons(col = "percent_hispanic", palette = "Purples",
title = "% Hispanic, 2020", alpha = 0.5,
id = "NAME")
# Dot-density maps are great for visualizing neighborhood heterogeneity
# Let's make a race / ethnicity dot-density map for PG County
tmap_mode("plot")
dc_race <- get_decennial(
geography = "tract",
state = "DC",
variables = c(
Hispanic = "P2_002N",
White = "P2_005N",
Black = "P2_006N",
Asian = "P2_008N"
),
year = 2020,
geometry = TRUE
)
# We use the `as_dot_density()` function in tidycensus to
# convert to scattered dots for mapping
dc_dots <- as_dot_density(
dc_race,
value = "value",
values_per_dot = 50,
group = "variable"
)
background_tracts <- filter(dc_race, variable == "White")
# Use `tm_dots()` to make the dot map
tm_shape(background_tracts) +
tm_polygons(col = "white",
border.col = "grey") +
tm_shape(dc_dots) +
tm_dots(col = "variable",
palette = "Set1",
alpha = 0.5,
size = 0.01,
title = "1 dot = 50 people") +
tm_layout(legend.outside = TRUE,
title = "Race/ethnicity,\n2020 US Census")
# Interactive dot maps are great for exploring demographic patterns!
tmap_mode("view")
tm_basemap(leaflet::providers$CartoDB.Positron) +
tm_shape(dc_dots) +
tm_dots(col = "variable",
border.lwd = 0,
palette = "Set1",
size = 0.01,
title = "1 dot = 200 people") +
tm_layout(legend.outside = TRUE,
title = "Race/ethnicity,\n2020 US Census")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment