Created
May 18, 2023 18:57
-
-
Save walkerke/0de1978bbf3d87ad7d97e3168e0fdedc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install packages (if necessary) | |
# install.packages(c("tidycensus", "tidyverse", "tmap", "mapview")) | |
# Load libraries | |
library(tidycensus) | |
library(tidyverse) | |
# Optional: if you plan to be a power user (500+ queries / day) | |
# Get a Census API key at https://api.census.gov/data/key_signup.html. | |
# Activate the key from the link in your email. | |
# Now you can get started! | |
# Restart R after running this line if you plan to install your key. | |
# Otherwise, don't worry about it. | |
install <- FALSE | |
census_api_key("YOUR KEY", install = install) | |
# Decennial Census data with `get_decennial()` | |
# 2020 data are available from the PL-94171 redistricting file | |
# Next week: the new Demographic and Housing Characteristics file! | |
# The `geography` argument controls the level of aggregation; | |
# many geographies can be optionally subset by state and / or county | |
md_pop_2020 <- get_decennial( | |
geography = "county", | |
variables = "P1_001N", | |
state = "MD", | |
year = 2020 | |
) | |
# Either print out the first few rows in your R console: | |
md_pop_2020 | |
# Or view the entire dataset in RStudio | |
View(md_pop_2020) | |
# ACS data are available with `get_acs()`. | |
# More variables, but with margins of error. | |
md_income <- get_acs( | |
geography = "tract", | |
variables = "B19013_001", | |
state = "MD", | |
year = 2021 | |
) | |
View(md_income) | |
# The `variables` argument takes one or more Census variable codes. | |
# How to identify them? | |
# `load_variables()` fetches a dataset of Census variables for | |
# a given year and dataset. | |
# For the detailed tables: | |
acs_detailed <- load_variables(2021, "acs5") | |
# For the Data Profile (pre-computed data): | |
acs_profile <- load_variables(2021, "acs5/profile") | |
View(acs_profile) | |
# Tables of Census or ACS data can be requested with a `table` argument | |
# For the ACS, use the characters that precede the underscore for the table ID | |
md_income_brackets <- get_acs( | |
geography = "county", | |
table = "B19001", | |
state = "MD", | |
year = 2021 | |
) | |
# Some analysts prefer data with variables spread across the columns; | |
# you can get this format with `output = "wide"` | |
md_income_wide <- get_acs( | |
geography = "county", | |
table = "B19001", | |
state = "MD", | |
year = 2021, | |
output = "wide" | |
) | |
# tidycensus outputs (long or wide) can be analyzed with tidyverse tools | |
# Example: create a new column, "Percent of households earning over $200k" | |
md_200k <- md_income_wide %>% | |
mutate(percent_200k = 100 * (B19001_017E / B19001_001E)) %>% | |
select(GEOID, NAME, percent_200k) %>% | |
arrange(desc(percent_200k)) | |
# Output can be visualized with the popular ggplot2 package, part of the tidyverse | |
ggplot(md_200k, aes(x = NAME, y = percent_200k)) + | |
geom_col() | |
# Not great! Let's clean it up: | |
md_200k %>% | |
mutate(NAME = str_remove(NAME, ", Maryland"), | |
NAME = ifelse(str_detect(NAME, "Baltimore"), | |
NAME, | |
str_remove(NAME, " County"))) %>% | |
ggplot(aes(x = percent_200k, y = reorder(NAME, percent_200k))) + | |
geom_col(fill = "#E21833", alpha = 0.8) + | |
theme_minimal(base_size = 12) + | |
labs(title = "Percent of households earning $200k or more", | |
subtitle = "Counties in Maryland", | |
caption = "Data source: 2017-2021 American Community Survey", | |
x = "ACS estimate (percent)", | |
y = "") | |
#### GIS workflows with tidycensus | |
# The argument `geometry = TRUE` gets you pre-joined geographic and demographic | |
# data - no need to carry out the steps yourselves! | |
md_2020_geo <- get_decennial( | |
geography = "county", | |
state = "MD", | |
variables = c(total_pop = "P2_001N", | |
hispanic = "P2_002N"), | |
year = 2020, | |
output = "wide", | |
geometry = TRUE | |
) %>% | |
mutate(percent_hispanic = 100 * (hispanic / total_pop)) | |
# To browse your data interactively, use the mapview package | |
library(mapview) | |
mapview(md_2020_geo) | |
# ggplot2 can map this data with `geom_sf()` | |
# We'll focus here on the tmap package, which offers a familiar | |
# interface to GIS users. | |
# We initialize a map with `tm_shape()`, then specify how to draw | |
# the data with `tm_polygons()`: | |
library(tmap) | |
tm_shape(md_2020_geo) + | |
tm_polygons() | |
# A choropleth map can be rendered by specifying a column to map: | |
tm_shape(md_2020_geo) + | |
tm_polygons(col = "percent_hispanic") | |
# We'll likely want to do some customization. Let's change the colors | |
# and move the legend outside the map frame: | |
tm_shape(md_2020_geo) + | |
tm_polygons(col = "percent_hispanic", palette = "Purples", | |
title = "% Hispanic, 2020") + | |
tm_layout(legend.outside = TRUE) | |
# We can also modify map elements for clarity and change | |
# the breaks from "pretty" (the default) to Jenks natural breaks | |
# (the default in ArcGIS) | |
tm_shape(md_2020_geo) + | |
tm_polygons(col = "percent_hispanic", palette = "Purples", | |
title = "% Hispanic, 2020", style = "jenks") + | |
tm_layout(legend.outside = TRUE, bg.color = "grey80") | |
# Alternative map types are also available, like graduated symbols: | |
tm_shape(md_2020_geo) + | |
tm_polygons() + | |
tm_bubbles(size = "hispanic", | |
col = "navy", | |
alpha = 0.5, | |
scale = 3, | |
title.size = "Hispanic residents, 2020") + | |
tm_layout(legend.outside = TRUE, | |
legend.outside.position = "bottom") | |
# To automatically convert to an interactive map, use `tmap_mode("view")` | |
tmap_mode("view") | |
tm_shape(md_2020_geo) + | |
tm_polygons(col = "percent_hispanic", palette = "Purples", | |
title = "% Hispanic, 2020", alpha = 0.5, | |
id = "NAME") | |
# Dot-density maps are great for visualizing neighborhood heterogeneity | |
# Let's make a race / ethnicity dot-density map for PG County | |
tmap_mode("plot") | |
dc_race <- get_decennial( | |
geography = "tract", | |
state = "DC", | |
variables = c( | |
Hispanic = "P2_002N", | |
White = "P2_005N", | |
Black = "P2_006N", | |
Asian = "P2_008N" | |
), | |
year = 2020, | |
geometry = TRUE | |
) | |
# We use the `as_dot_density()` function in tidycensus to | |
# convert to scattered dots for mapping | |
dc_dots <- as_dot_density( | |
dc_race, | |
value = "value", | |
values_per_dot = 50, | |
group = "variable" | |
) | |
background_tracts <- filter(dc_race, variable == "White") | |
# Use `tm_dots()` to make the dot map | |
tm_shape(background_tracts) + | |
tm_polygons(col = "white", | |
border.col = "grey") + | |
tm_shape(dc_dots) + | |
tm_dots(col = "variable", | |
palette = "Set1", | |
alpha = 0.5, | |
size = 0.01, | |
title = "1 dot = 50 people") + | |
tm_layout(legend.outside = TRUE, | |
title = "Race/ethnicity,\n2020 US Census") | |
# Interactive dot maps are great for exploring demographic patterns! | |
tmap_mode("view") | |
tm_basemap(leaflet::providers$CartoDB.Positron) + | |
tm_shape(dc_dots) + | |
tm_dots(col = "variable", | |
border.lwd = 0, | |
palette = "Set1", | |
size = 0.01, | |
title = "1 dot = 200 people") + | |
tm_layout(legend.outside = TRUE, | |
title = "Race/ethnicity,\n2020 US Census") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment