Created
July 2, 2022 20:54
-
-
Save py/3295f0bde84cfc0c1f1dd74659ac823a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------------------------------------------------------------# | |
# Generate a file with county-level decennial census data 1900 to 2020 | |
# Author: Peter Y (github: py) | |
# Date: 2022-07-01 | |
# Inputs: | |
# - NBER population file 1900 to 1990 (https://data.nber.org/census/pop/cencounts.csv") | |
# - Census API calls for 2000 to present | |
# Outputs: | |
# An R tibble with a row for each US county and a column for each decade 1900 to 2020 | |
# with that county's population. The NBER file does include rows for US total and each | |
# state. These rows were removed. | |
#------------------------------------------------------------------------------# | |
# Load libraries ----- | |
library(tidyverse) | |
library(tidycensus) | |
library(stringr) | |
# Load Census API Key ----- | |
census_api_key() #Obtain your own at https://api.census.gov/data/key_signup.html | |
# Load existing NBER file ----- | |
df <- read_csv("https://data.nber.org/census/pop/cencounts.csv") | |
# Convert population cols to numeric | |
df[1:10] <- sapply(df[1:10], as.numeric) | |
df <- as_tibble(df) | |
# Obtain 2000, 2010, and 2020 population data ----- | |
p2000.df <- get_decennial(geography = "county", | |
variables = c(pop = "H010001"), | |
year = 2000, | |
geometry = F, | |
show_call = T | |
) | |
names(p2000.df) <- c("fips", "county", "var", "pop2000") | |
p2000.df <- p2000.df %>% | |
select(fips, pop2000) | |
p2010.df <- get_decennial(geography = "county", | |
variables = c(pop = "H010001"), | |
year = 2010, | |
geometry = F, | |
show_call = T | |
) | |
names(p2010.df) <- c("fips", "county", "var", "pop2010") | |
p2010.df <- p2010.df %>% | |
select(fips, pop2010) | |
p2020.df <- get_decennial(geography = "county", | |
variables = c(pop = "P1_001N"), | |
year = 2020, | |
geometry = F, | |
show_call = T | |
) | |
names(p2020.df) <- c("fips", "county", "var", "pop2020") | |
p2020.df <- p2020.df %>% | |
select(fips, pop2020) | |
# Join 2000 - 2020 to existing file ----- | |
# Add col for state | |
df <- df %>% | |
mutate( | |
state = str_extract(name, "..") | |
) | |
# Reorder cols | |
names(df) | |
df <- df %>% select(fips, name, state, 1:10) | |
# Join | |
df <- df %>% left_join(p2000.df) | |
df <- df %>% left_join(p2010.df) | |
df <- df %>% left_join(p2020.df) | |
# Remove rows for US and state totals | |
df <- df %>% filter(!str_sub(fips, 3, 5) == "000") | |
# Save to CSV | |
write_csv(df, "data/pop_county_decade_1900_2020.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See this gist for the csv output.