Last active
March 9, 2019 02:08
-
-
Save chris-prener/7399f773c7da028a079e80a808df53ac to your computer and use it in GitHub Desktop.
Parse Census Bureau address range data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parse Census Bureau address range data | |
# Problem: | |
# We need a way to match incidents (that have address-level data) to the blockface they occur on. | |
# By blockface, I mean the houses on either side of a street between two cross streets (i.e. | |
# the 100-block of Main Street between 1st and 2nd Avenues). | |
# | |
# Typically, census block and city block shapefiles do not represent a blockface. Instead, | |
# they have parts of up to four different streets, typically representing half of a blockface | |
# for each of the included streets. So a block bounded by Main Street on the south, 1st Avenue | |
# on the west, Washington Street on the north, and 2nd Avenue on the east would contain addresses | |
# from each of the four streets that form the block's boundaries. Joining addresses to traditional | |
# blocks therefore produces entities that are distinct from the way we think about a blockface. | |
# | |
# We need to be able to do two things - identify incidents that occur on "marked" blocks, and then | |
# also produce counts of incidents for all blocks. | |
# | |
# The census bureau pubishes something that could be helpful - line data with block segements. | |
# These include the low and high address numbers for both sides of the street (i.e. the blockface). | |
# However, I need a quick way to apply the blockface identification numbers to individual | |
# addresses in the incident data. | |
# dependencies | |
library(dplyr) | |
library(purrr) | |
library(stringr) | |
library(tidyr) | |
# custom function to create a list-column with every other integer value between the low | |
# and high values for a given side of the blockface | |
parse_range <- function(x){ | |
# convert item to numeric | |
vector <- as.numeric(x) | |
# expand vector to include every other integer between low and high values | |
out <- seq.int(from = vector[1], to = vector[2], by = 2) | |
# return output | |
return(out) | |
} | |
# sample block data | |
blocks <- tibble( | |
bfId = c(1,2,3,4), | |
rightLow = c(400,500,600,700), | |
rightHigh = c(498,598,698,798), | |
leftLow = c(401,501,601,701), | |
leftHigh = c(499,599,699,799), | |
street = c("Main St", "Main St", "Main St", "Main St"), | |
marked = c(TRUE, FALSE, FALSE, FALSE) | |
) | |
# sample incident data | |
incidents <- tibble( | |
callId = c(101, 102, 103, 104, 105), | |
address = c("424 Main St", "447 Main St", "504 Main St", "667 Main St", "773 Main St"), | |
date = c("1/1/14", "3/6/14", "5/12/14", "4/19/14", "2/12/14"), | |
call = c("Graffiti", "Graffiti", "Pothole", "Lights Out", "Vacant Building") | |
) | |
# convert ranges into individual records, right side of street | |
blocks %>% | |
select(-c(leftLow, leftHigh)) %>% | |
mutate( | |
rightRange = str_split(string = str_c(as.character(rightLow), "-", as.character(rightHigh)), pattern = "-") | |
) %>% | |
mutate(rightRange = map(.x = rightRange, .f = parse_range)) %>% | |
unnest() %>% | |
select(-c(rightLow, rightHigh)) %>% | |
rename(house = rightRange) %>% | |
select(bfId, house, street, marked) -> right | |
# convert ranges into individual records, left side of street | |
blocks %>% | |
select(-c(rightLow, rightHigh)) %>% | |
mutate( | |
leftRange = str_split(string = str_c(as.character(leftLow), "-", as.character(leftHigh)), pattern = "-") | |
) %>% | |
mutate(leftRange = map(.x = leftRange, .f = parse_range)) %>% | |
unnest() %>% | |
select(-c(leftLow, leftHigh)) %>% | |
rename(house = leftRange) %>% | |
select(bfId, house, street, marked) -> left | |
# combine left and rigt side of street data | |
bind_rows(right, left) %>% | |
arrange(bfId, house) %>% | |
mutate(address = str_c(house, street, sep = " ")) -> master | |
# combine master and incident data to apply blockface ids and | |
# the logical indicator of a "marked" block to each incident | |
master %>% | |
select(bfId, address, marked) %>% | |
left_join(incidents, ., by = "address") -> incidentsWithBlock | |
# calculate counts per blockface | |
incidentsWithBlock %>% | |
group_by(bfId) %>% | |
summarise(count = n()) %>% | |
left_join(blocks, ., by = "bfId") -> countsByBlock | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment