walkerke · May 18, 2023 18:57
diff --git a/rgovys-script.R b/rgovys-script.R
 # Install packages (if necessary)
 # install.packages(c("tidycensus", "tidyverse", "tmap", "mapview"))

 # Load libraries
 library(tidycensus)
 library(tidyverse)

 # Optional: if you plan to be a power user (500+ queries / day)
 # Get a Census API key at https://api.census.gov/data/key_signup.html.  
 # Activate the key from the link in your email.  
 # Now you can get started!
 # Restart R after running this line if you plan to install your key.  
 # Otherwise, don't worry about it.  
 install <- FALSE
 census_api_key("YOUR KEY", install = install)

 # Decennial Census data with `get_decennial()`
 # 2020 data are available from the PL-94171 redistricting file
 # Next week: the new Demographic and Housing Characteristics file!

 # The `geography` argument controls the level of aggregation;
 # many geographies can be optionally subset by state and / or county
 md_pop_2020 <- get_decennial(
  geography = "county",
  variables = "P1_001N",
  state = "MD",
  year = 2020
 )

 # Either print out the first few rows in your R console:
 md_pop_2020

 # Or view the entire dataset in RStudio
 View(md_pop_2020)

 # ACS data are available with `get_acs()`.  
 # More variables, but with margins of error.
 md_income <- get_acs(
  geography = "tract",
  variables = "B19013_001",
  state = "MD",
  year = 2021
 )

 View(md_income)

 # The `variables` argument takes one or more Census variable codes.  
 # How to identify them?
 # `load_variables()` fetches a dataset of Census variables for 
 # a given year and dataset.
 # For the detailed tables:
 acs_detailed <- load_variables(2021, "acs5")
 # For the Data Profile (pre-computed data): 
 acs_profile <- load_variables(2021, "acs5/profile")

 View(acs_profile)

 # Tables of Census or ACS data can be requested with a `table` argument
 # For the ACS, use the characters that precede the underscore for the table ID
 md_income_brackets <- get_acs(
  geography = "county",
  table = "B19001",
  state = "MD",
  year = 2021
 )

 # Some analysts prefer data with variables spread across the columns;
 # you can get this format with `output = "wide"`
 md_income_wide <- get_acs(
  geography = "county",
  table = "B19001",
  state = "MD",
  year = 2021,
  output = "wide"
 )

 # tidycensus outputs (long or wide) can be analyzed with tidyverse tools
 # Example: create a new column, "Percent of households earning over $200k"
 md_200k <- md_income_wide %>%
  mutate(percent_200k = 100 * (B19001_017E / B19001_001E)) %>%
  select(GEOID, NAME, percent_200k) %>%
  arrange(desc(percent_200k))

 # Output can be visualized with the popular ggplot2 package, part of the tidyverse
 ggplot(md_200k, aes(x = NAME, y = percent_200k)) + 
  geom_col()

 # Not great! Let's clean it up:
 md_200k %>%
  mutate(NAME = str_remove(NAME, ", Maryland"),
         NAME = ifelse(str_detect(NAME, "Baltimore"), 
                       NAME,
                       str_remove(NAME, " County"))) %>%
  ggplot(aes(x = percent_200k, y = reorder(NAME, percent_200k))) + 
  geom_col(fill = "#E21833", alpha = 0.8) + 
  theme_minimal(base_size = 12) + 
  labs(title = "Percent of households earning $200k or more",
       subtitle = "Counties in Maryland",
       caption = "Data source: 2017-2021 American Community Survey",
       x = "ACS estimate (percent)",
       y = "")


 #### GIS workflows with tidycensus

 # The argument `geometry = TRUE` gets you pre-joined geographic and demographic
 # data - no need to carry out the steps yourselves!
 md_2020_geo <- get_decennial(
  geography = "county",
  state = "MD",
  variables = c(total_pop = "P2_001N", 
                hispanic = "P2_002N"),
  year = 2020,
  output = "wide",
  geometry = TRUE
 ) %>%
  mutate(percent_hispanic = 100 * (hispanic / total_pop))

 # To browse your data interactively, use the mapview package
 library(mapview)

 mapview(md_2020_geo)


 # ggplot2 can map this data with `geom_sf()`
 # We'll focus here on the tmap package, which offers a familiar 
 # interface to GIS users. 
 # We initialize a map with `tm_shape()`, then specify how to draw 
 # the data with `tm_polygons()`:
 library(tmap)

 tm_shape(md_2020_geo) + 
  tm_polygons()

 # A choropleth map can be rendered by specifying a column to map:
 tm_shape(md_2020_geo) + 
  tm_polygons(col = "percent_hispanic")

 # We'll likely want to do some customization. Let's change the colors
 # and move the legend outside the map frame:
 tm_shape(md_2020_geo) + 
  tm_polygons(col = "percent_hispanic", palette = "Purples",
              title = "% Hispanic, 2020") + 
  tm_layout(legend.outside = TRUE)

 # We can also modify map elements for clarity and change
 # the breaks from "pretty" (the default) to Jenks natural breaks
 # (the default in ArcGIS)
 tm_shape(md_2020_geo) + 
  tm_polygons(col = "percent_hispanic", palette = "Purples",
              title = "% Hispanic, 2020", style = "jenks") + 
  tm_layout(legend.outside = TRUE, bg.color = "grey80")

 # Alternative map types are also available, like graduated symbols:
 tm_shape(md_2020_geo) + 
  tm_polygons() + 
  tm_bubbles(size = "hispanic", 
             col = "navy",
             alpha = 0.5,
             scale = 3,
             title.size = "Hispanic residents, 2020") + 
  tm_layout(legend.outside = TRUE,
            legend.outside.position = "bottom")

 # To automatically convert to an interactive map, use `tmap_mode("view")`
 tmap_mode("view")

 tm_shape(md_2020_geo) + 
  tm_polygons(col = "percent_hispanic", palette = "Purples",
              title = "% Hispanic, 2020", alpha = 0.5,
              id = "NAME") 

 # Dot-density maps are great for visualizing neighborhood heterogeneity
 # Let's make a race / ethnicity dot-density map for PG County
 tmap_mode("plot")

 dc_race <- get_decennial(
  geography = "tract",
  state = "DC",
  variables = c(
    Hispanic = "P2_002N",
    White = "P2_005N",
    Black = "P2_006N",
    Asian = "P2_008N"
  ),
  year = 2020,
  geometry = TRUE
 ) 

 # We use the `as_dot_density()` function in tidycensus to 
 # convert to scattered dots for mapping
 dc_dots <- as_dot_density(
  dc_race,
  value = "value",
  values_per_dot = 50,
  group = "variable"
 )

 background_tracts <- filter(dc_race, variable == "White")

 # Use `tm_dots()` to make the dot map
 tm_shape(background_tracts) + 
  tm_polygons(col = "white", 
              border.col = "grey") + 
  tm_shape(dc_dots) +
  tm_dots(col = "variable", 
          palette = "Set1",
          alpha = 0.5,
          size = 0.01, 
          title = "1 dot = 50 people") + 
  tm_layout(legend.outside = TRUE,
            title = "Race/ethnicity,\n2020 US Census")

 # Interactive dot maps are great for exploring demographic patterns!
 tmap_mode("view")

 tm_basemap(leaflet::providers$CartoDB.Positron) + 
  tm_shape(dc_dots) +
  tm_dots(col = "variable",
          border.lwd = 0,
          palette = "Set1",
          size = 0.01, 
          title = "1 dot = 200 people") + 
  tm_layout(legend.outside = TRUE,
            title = "Race/ethnicity,\n2020 US Census")
	# Install packages (if necessary)
	# install.packages(c("tidycensus", "tidyverse", "tmap", "mapview"))

	# Load libraries
	library(tidycensus)
	library(tidyverse)

	# Optional: if you plan to be a power user (500+ queries / day)
	# Get a Census API key at https://api.census.gov/data/key_signup.html.
	# Activate the key from the link in your email.
	# Now you can get started!
	# Restart R after running this line if you plan to install your key.
	# Otherwise, don't worry about it.
	install <- FALSE
	census_api_key("YOUR KEY", install = install)

	# Decennial Census data with `get_decennial()`
	# 2020 data are available from the PL-94171 redistricting file
	# Next week: the new Demographic and Housing Characteristics file!

	# The `geography` argument controls the level of aggregation;
	# many geographies can be optionally subset by state and / or county
	md_pop_2020 <- get_decennial(
	geography = "county",
	variables = "P1_001N",
	state = "MD",
	year = 2020
	)

	# Either print out the first few rows in your R console:
	md_pop_2020

	# Or view the entire dataset in RStudio
	View(md_pop_2020)

	# ACS data are available with `get_acs()`.
	# More variables, but with margins of error.
	md_income <- get_acs(
	geography = "tract",
	variables = "B19013_001",
	state = "MD",
	year = 2021
	)

	View(md_income)

	# The `variables` argument takes one or more Census variable codes.
	# How to identify them?
	# `load_variables()` fetches a dataset of Census variables for
	# a given year and dataset.
	# For the detailed tables:
	acs_detailed <- load_variables(2021, "acs5")
	# For the Data Profile (pre-computed data):
	acs_profile <- load_variables(2021, "acs5/profile")

	View(acs_profile)

	# Tables of Census or ACS data can be requested with a `table` argument
	# For the ACS, use the characters that precede the underscore for the table ID
	md_income_brackets <- get_acs(
	geography = "county",
	table = "B19001",
	state = "MD",
	year = 2021
	)

	# Some analysts prefer data with variables spread across the columns;
	# you can get this format with `output = "wide"`
	md_income_wide <- get_acs(
	geography = "county",
	table = "B19001",
	state = "MD",
	year = 2021,
	output = "wide"
	)

	# tidycensus outputs (long or wide) can be analyzed with tidyverse tools
	# Example: create a new column, "Percent of households earning over $200k"
	md_200k <- md_income_wide %>%
	mutate(percent_200k = 100 * (B19001_017E / B19001_001E)) %>%
	select(GEOID, NAME, percent_200k) %>%
	arrange(desc(percent_200k))

	# Output can be visualized with the popular ggplot2 package, part of the tidyverse
	ggplot(md_200k, aes(x = NAME, y = percent_200k)) +
	geom_col()

	# Not great! Let's clean it up:
	md_200k %>%
	mutate(NAME = str_remove(NAME, ", Maryland"),
	NAME = ifelse(str_detect(NAME, "Baltimore"),
	NAME,
	str_remove(NAME, " County"))) %>%
	ggplot(aes(x = percent_200k, y = reorder(NAME, percent_200k))) +
	geom_col(fill = "#E21833", alpha = 0.8) +
	theme_minimal(base_size = 12) +
	labs(title = "Percent of households earning $200k or more",
	subtitle = "Counties in Maryland",
	caption = "Data source: 2017-2021 American Community Survey",
	x = "ACS estimate (percent)",
	y = "")


	#### GIS workflows with tidycensus

	# The argument `geometry = TRUE` gets you pre-joined geographic and demographic
	# data - no need to carry out the steps yourselves!
	md_2020_geo <- get_decennial(
	geography = "county",
	state = "MD",
	variables = c(total_pop = "P2_001N",
	hispanic = "P2_002N"),
	year = 2020,
	output = "wide",
	geometry = TRUE
	) %>%
	mutate(percent_hispanic = 100 * (hispanic / total_pop))

	# To browse your data interactively, use the mapview package
	library(mapview)

	mapview(md_2020_geo)


	# ggplot2 can map this data with `geom_sf()`
	# We'll focus here on the tmap package, which offers a familiar
	# interface to GIS users.
	# We initialize a map with `tm_shape()`, then specify how to draw
	# the data with `tm_polygons()`:
	library(tmap)

	tm_shape(md_2020_geo) +
	tm_polygons()

	# A choropleth map can be rendered by specifying a column to map:
	tm_shape(md_2020_geo) +
	tm_polygons(col = "percent_hispanic")

	# We'll likely want to do some customization. Let's change the colors
	# and move the legend outside the map frame:
	tm_shape(md_2020_geo) +
	tm_polygons(col = "percent_hispanic", palette = "Purples",
	title = "% Hispanic, 2020") +
	tm_layout(legend.outside = TRUE)

	# We can also modify map elements for clarity and change
	# the breaks from "pretty" (the default) to Jenks natural breaks
	# (the default in ArcGIS)
	tm_shape(md_2020_geo) +
	tm_polygons(col = "percent_hispanic", palette = "Purples",
	title = "% Hispanic, 2020", style = "jenks") +
	tm_layout(legend.outside = TRUE, bg.color = "grey80")

	# Alternative map types are also available, like graduated symbols:
	tm_shape(md_2020_geo) +
	tm_polygons() +
	tm_bubbles(size = "hispanic",
	col = "navy",
	alpha = 0.5,
	scale = 3,
	title.size = "Hispanic residents, 2020") +
	tm_layout(legend.outside = TRUE,
	legend.outside.position = "bottom")

	# To automatically convert to an interactive map, use `tmap_mode("view")`
	tmap_mode("view")

	tm_shape(md_2020_geo) +
	tm_polygons(col = "percent_hispanic", palette = "Purples",
	title = "% Hispanic, 2020", alpha = 0.5,
	id = "NAME")

	# Dot-density maps are great for visualizing neighborhood heterogeneity
	# Let's make a race / ethnicity dot-density map for PG County
	tmap_mode("plot")

	dc_race <- get_decennial(
	geography = "tract",
	state = "DC",
	variables = c(
	Hispanic = "P2_002N",
	White = "P2_005N",
	Black = "P2_006N",
	Asian = "P2_008N"
	),
	year = 2020,
	geometry = TRUE
	)

	# We use the `as_dot_density()` function in tidycensus to
	# convert to scattered dots for mapping
	dc_dots <- as_dot_density(
	dc_race,
	value = "value",
	values_per_dot = 50,
	group = "variable"
	)

	background_tracts <- filter(dc_race, variable == "White")

	# Use `tm_dots()` to make the dot map
	tm_shape(background_tracts) +
	tm_polygons(col = "white",
	border.col = "grey") +
	tm_shape(dc_dots) +
	tm_dots(col = "variable",
	palette = "Set1",
	alpha = 0.5,
	size = 0.01,
	title = "1 dot = 50 people") +
	tm_layout(legend.outside = TRUE,
	title = "Race/ethnicity,\n2020 US Census")

	# Interactive dot maps are great for exploring demographic patterns!
	tmap_mode("view")

	tm_basemap(leaflet::providers$CartoDB.Positron) +
	tm_shape(dc_dots) +
	tm_dots(col = "variable",
	border.lwd = 0,
	palette = "Set1",
	size = 0.01,
	title = "1 dot = 200 people") +
	tm_layout(legend.outside = TRUE,
	title = "Race/ethnicity,\n2020 US Census")