lwaldron · September 13, 2021 01:57
diff --git a/NYC-COVID_ACS_merge b/NYC-COVID_ACS_merge
 ##### Importing COVID-19 data from the NYC DOHMH github (https://github.com/nychealth/coronavirus-data) and merge with ACS data of interest

 # In order to get the URL of a table of your interest, go to the table and click on  'History' on the top right corner. 
 # You will see the upload history for the table on this page. Choose a time point of interest and click on the second 
 # to the last button on the right (if you hover over the button it should say 'View at this point in the hisotry'). 
 # You will be directed to view the table. Then click on 'Raw' and copy the URL. 

 covid <- read.csv("https://raw.githubusercontent.com/nychealth/coronavirus-data/7ce1b84610232be9c3f780484865a51f73b8c469/recent/recent-4-week-by-modzcta.csv")
 head(covid)

 # Import a table for converting ZCTA to MODZCTA geography. The COVID data are at the MODZCTA level whereas 
 # the census ACS data will be extracted at the ZCTA level 
 # (see https://github.com/nychealth/coronavirus-data/tree/master/Geography-resources for an explanation the geography). 
 # That's why we need a ZCTA to MODZCTA conversion table. 

 geo <- read.csv("https://raw.githubusercontent.com/nychealth/coronavirus-data/7ce1b84610232be9c3f780484865a51f73b8c469/Geography-resources/ZCTA-to-MODZCTA.csv")
 head(geo)

 # Save the list of NYC ZCTAs into a vector, excluding 99999 (invalid)
 zcta <- geo$ZCTA[geo$MODZCTA!=99999]

 # You should go to https://api.census.gov/data/key_signup.html to get your own census API key for querying. 
 # Replace the string with your own key
 mycensuskey <-"XXXXXXXXXXX"

 # Variables to be extracted. You can go to https://api.census.gov/data/2019/acs/acs5/variables.html or 
 # https://www.socialexplorer.com/data/ACS2019_5yr/metadata/?ds=ACS19_5yr to see what variables are available in ACS. 
 # As an example, I am extracting poverty status here. The B06012 table provides estimates for Place Of Birth By 
 # Poverty Status In The Past 12 Months In The United States. B06012_001E is the denoimantor (total) and B06012_002E 
 # is the numerator (estimate for # of people living below 100 % of the poverty level)
 povvar <- c("B06012_001E", "B06012_002E")


 # Concatenate all the ZCTAs and make them separated by comma (this will be fed into the census API query)
 zcta.string <- paste(zcta, collapse = ",")


 # Pull ACS data 
 acsdata5y <- censusapi::getCensus(name = "acs/acs5",
                                  vintage = 2019,
                                  key = mycensuskey,
                                  vars = c(povvar),
                                  region = paste0("zip code tabulation area:",zcta.string), 
                                  regionin = "state:36")

 head(acsdata5y)


 library(tidyverse)
 # Merge acsdata5y with geo to aggregate the ACS data at the MODZCTA level
 acsdata5y_modzcta <- merge(acsdata5y, geo, by.x= "zip_code_tabulation_area", by.y ="ZCTA")

 # Sum data by MODZCTA 
 acsdata5y_modzcta_new <- acsdata5y_modzcta %>% 
  group_by(MODZCTA) %>% 
  summarise(B06012_001E = sum(B06012_001E),
            B06012_002E = sum(B06012_002E))

 # Create proportions, i.e., proportion of people living in poverty in each MOZCTA
 acsdata5y_modzcta_new <- acsdata5y_modzcta_new %>% 
  mutate(poverty = B06012_002E/B06012_001E*100)
	##### Importing COVID-19 data from the NYC DOHMH github (https://github.com/nychealth/coronavirus-data) and merge with ACS data of interest

	# In order to get the URL of a table of your interest, go to the table and click on 'History' on the top right corner.
	# You will see the upload history for the table on this page. Choose a time point of interest and click on the second
	# to the last button on the right (if you hover over the button it should say 'View at this point in the hisotry').
	# You will be directed to view the table. Then click on 'Raw' and copy the URL.

	covid <- read.csv("https://raw.githubusercontent.com/nychealth/coronavirus-data/7ce1b84610232be9c3f780484865a51f73b8c469/recent/recent-4-week-by-modzcta.csv")
	head(covid)

	# Import a table for converting ZCTA to MODZCTA geography. The COVID data are at the MODZCTA level whereas
	# the census ACS data will be extracted at the ZCTA level
	# (see https://github.com/nychealth/coronavirus-data/tree/master/Geography-resources for an explanation the geography).
	# That's why we need a ZCTA to MODZCTA conversion table.

	geo <- read.csv("https://raw.githubusercontent.com/nychealth/coronavirus-data/7ce1b84610232be9c3f780484865a51f73b8c469/Geography-resources/ZCTA-to-MODZCTA.csv")
	head(geo)

	# Save the list of NYC ZCTAs into a vector, excluding 99999 (invalid)
	zcta <- geo$ZCTA[geo$MODZCTA!=99999]

	# You should go to https://api.census.gov/data/key_signup.html to get your own census API key for querying.
	# Replace the string with your own key
	mycensuskey <-"XXXXXXXXXXX"

	# Variables to be extracted. You can go to https://api.census.gov/data/2019/acs/acs5/variables.html or
	# https://www.socialexplorer.com/data/ACS2019_5yr/metadata/?ds=ACS19_5yr to see what variables are available in ACS.
	# As an example, I am extracting poverty status here. The B06012 table provides estimates for Place Of Birth By
	# Poverty Status In The Past 12 Months In The United States. B06012_001E is the denoimantor (total) and B06012_002E
	# is the numerator (estimate for # of people living below 100 % of the poverty level)
	povvar <- c("B06012_001E", "B06012_002E")


	# Concatenate all the ZCTAs and make them separated by comma (this will be fed into the census API query)
	zcta.string <- paste(zcta, collapse = ",")


	# Pull ACS data
	acsdata5y <- censusapi::getCensus(name = "acs/acs5",
	vintage = 2019,
	key = mycensuskey,
	vars = c(povvar),
	region = paste0("zip code tabulation area:",zcta.string),
	regionin = "state:36")

	head(acsdata5y)


	library(tidyverse)
	# Merge acsdata5y with geo to aggregate the ACS data at the MODZCTA level
	acsdata5y_modzcta <- merge(acsdata5y, geo, by.x= "zip_code_tabulation_area", by.y ="ZCTA")

	# Sum data by MODZCTA
	acsdata5y_modzcta_new <- acsdata5y_modzcta %>%
	group_by(MODZCTA) %>%
	summarise(B06012_001E = sum(B06012_001E),
	B06012_002E = sum(B06012_002E))

	# Create proportions, i.e., proportion of people living in poverty in each MOZCTA
	acsdata5y_modzcta_new <- acsdata5y_modzcta_new %>%
	mutate(poverty = B06012_002E/B06012_001E*100)