Created
September 13, 2021 01:57
-
-
Save lwaldron/1b849d02e688452035fad72c8fa18c3e to your computer and use it in GitHub Desktop.
NYC-COVID data merged with ACS community-level data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##### Importing COVID-19 data from the NYC DOHMH github (https://github.com/nychealth/coronavirus-data) and merge with ACS data of interest | |
# In order to get the URL of a table of your interest, go to the table and click on 'History' on the top right corner. | |
# You will see the upload history for the table on this page. Choose a time point of interest and click on the second | |
# to the last button on the right (if you hover over the button it should say 'View at this point in the hisotry'). | |
# You will be directed to view the table. Then click on 'Raw' and copy the URL. | |
covid <- read.csv("https://raw.githubusercontent.com/nychealth/coronavirus-data/7ce1b84610232be9c3f780484865a51f73b8c469/recent/recent-4-week-by-modzcta.csv") | |
head(covid) | |
# Import a table for converting ZCTA to MODZCTA geography. The COVID data are at the MODZCTA level whereas | |
# the census ACS data will be extracted at the ZCTA level | |
# (see https://github.com/nychealth/coronavirus-data/tree/master/Geography-resources for an explanation the geography). | |
# That's why we need a ZCTA to MODZCTA conversion table. | |
geo <- read.csv("https://raw.githubusercontent.com/nychealth/coronavirus-data/7ce1b84610232be9c3f780484865a51f73b8c469/Geography-resources/ZCTA-to-MODZCTA.csv") | |
head(geo) | |
# Save the list of NYC ZCTAs into a vector, excluding 99999 (invalid) | |
zcta <- geo$ZCTA[geo$MODZCTA!=99999] | |
# You should go to https://api.census.gov/data/key_signup.html to get your own census API key for querying. | |
# Replace the string with your own key | |
mycensuskey <-"XXXXXXXXXXX" | |
# Variables to be extracted. You can go to https://api.census.gov/data/2019/acs/acs5/variables.html or | |
# https://www.socialexplorer.com/data/ACS2019_5yr/metadata/?ds=ACS19_5yr to see what variables are available in ACS. | |
# As an example, I am extracting poverty status here. The B06012 table provides estimates for Place Of Birth By | |
# Poverty Status In The Past 12 Months In The United States. B06012_001E is the denoimantor (total) and B06012_002E | |
# is the numerator (estimate for # of people living below 100 % of the poverty level) | |
povvar <- c("B06012_001E", "B06012_002E") | |
# Concatenate all the ZCTAs and make them separated by comma (this will be fed into the census API query) | |
zcta.string <- paste(zcta, collapse = ",") | |
# Pull ACS data | |
acsdata5y <- censusapi::getCensus(name = "acs/acs5", | |
vintage = 2019, | |
key = mycensuskey, | |
vars = c(povvar), | |
region = paste0("zip code tabulation area:",zcta.string), | |
regionin = "state:36") | |
head(acsdata5y) | |
library(tidyverse) | |
# Merge acsdata5y with geo to aggregate the ACS data at the MODZCTA level | |
acsdata5y_modzcta <- merge(acsdata5y, geo, by.x= "zip_code_tabulation_area", by.y ="ZCTA") | |
# Sum data by MODZCTA | |
acsdata5y_modzcta_new <- acsdata5y_modzcta %>% | |
group_by(MODZCTA) %>% | |
summarise(B06012_001E = sum(B06012_001E), | |
B06012_002E = sum(B06012_002E)) | |
# Create proportions, i.e., proportion of people living in poverty in each MOZCTA | |
acsdata5y_modzcta_new <- acsdata5y_modzcta_new %>% | |
mutate(poverty = B06012_002E/B06012_001E*100) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment