chris-prener · March 9, 2019 02:08
diff --git a/parse-bocks.R b/parse-bocks.R
 # Parse Census Bureau address range data

 # Problem:
 # We need a way to match incidents (that have address-level data) to the blockface they occur on.
 # By blockface, I mean the houses on either side of a street between two cross streets (i.e.
 # the 100-block of Main Street between 1st and 2nd Avenues).
 #
 # Typically, census block and city block shapefiles do not represent a blockface. Instead, 
 # they have parts of up to four different streets, typically representing half of a blockface 
 # for each of the included streets. So a block bounded by Main Street on the south, 1st Avenue
 # on the west, Washington Street on the north, and 2nd Avenue on the east would contain addresses
 # from each of the four streets that form the block's boundaries. Joining addresses to traditional
 # blocks therefore produces entities that are distinct from the way we think about a blockface.
 #
 # We need to be able to do two things - identify incidents that occur on "marked" blocks, and then
 # also produce counts of incidents for all blocks.
 #
 # The census bureau pubishes something that could be helpful - line data with block segements.
 # These include the low and high address numbers for both sides of the street (i.e. the blockface).
 # However, I need a quick way to apply the blockface identification numbers to individual
 # addresses in the incident data.

 # dependencies
 library(dplyr)
 library(purrr)
 library(stringr)
 library(tidyr)

 # custom function to create a list-column with every other integer value between the low 
 # and high values for a given side of the blockface
 parse_range <- function(x){
  
  # convert item to numeric
  vector <- as.numeric(x)
  
  # expand vector to include every other integer between low and high values
  out <- seq.int(from = vector[1], to = vector[2], by = 2)
  
  # return output
  return(out)
  
 }

 # sample block data
 blocks <- tibble(
  bfId = c(1,2,3,4),
  rightLow = c(400,500,600,700),
  rightHigh = c(498,598,698,798),
  leftLow = c(401,501,601,701),
  leftHigh = c(499,599,699,799),
  street = c("Main St", "Main St", "Main St", "Main St"),
  marked = c(TRUE, FALSE, FALSE, FALSE)
 )

 # sample incident data
 incidents <- tibble(
  callId = c(101, 102, 103, 104, 105),
  address = c("424 Main St", "447 Main St", "504 Main St", "667 Main St", "773 Main St"),
  date = c("1/1/14", "3/6/14", "5/12/14", "4/19/14", "2/12/14"),
  call = c("Graffiti", "Graffiti", "Pothole", "Lights Out", "Vacant Building")
 )

 # convert ranges into individual records, right side of street
 blocks %>%
  select(-c(leftLow, leftHigh)) %>%
  mutate(
    rightRange = str_split(string = str_c(as.character(rightLow), "-", as.character(rightHigh)), pattern = "-")
  ) %>%
  mutate(rightRange = map(.x = rightRange, .f = parse_range)) %>% 
  unnest() %>%
  select(-c(rightLow, rightHigh)) %>%
  rename(house = rightRange) %>%
  select(bfId, house, street, marked) -> right

 # convert ranges into individual records, left side of street
 blocks %>%
  select(-c(rightLow, rightHigh)) %>%
  mutate(
    leftRange = str_split(string = str_c(as.character(leftLow), "-", as.character(leftHigh)),  pattern = "-")
  ) %>%
  mutate(leftRange = map(.x = leftRange, .f = parse_range)) %>% 
  unnest() %>%
  select(-c(leftLow, leftHigh)) %>%
  rename(house = leftRange) %>%
  select(bfId, house, street, marked) -> left

 # combine left and rigt side of street data
 bind_rows(right, left) %>%
  arrange(bfId, house) %>%
  mutate(address = str_c(house, street, sep = " ")) -> master

 # combine master and incident data to apply blockface ids and 
 # the logical indicator of a "marked" block to each incident
 master %>%
  select(bfId, address, marked) %>%
  left_join(incidents, ., by = "address") -> incidentsWithBlock

 # calculate counts per blockface
 incidentsWithBlock %>%
  group_by(bfId) %>%
  summarise(count = n()) %>%
  left_join(blocks, ., by = "bfId") -> countsByBlock
	# Parse Census Bureau address range data

	# Problem:
	# We need a way to match incidents (that have address-level data) to the blockface they occur on.
	# By blockface, I mean the houses on either side of a street between two cross streets (i.e.
	# the 100-block of Main Street between 1st and 2nd Avenues).
	#
	# Typically, census block and city block shapefiles do not represent a blockface. Instead,
	# they have parts of up to four different streets, typically representing half of a blockface
	# for each of the included streets. So a block bounded by Main Street on the south, 1st Avenue
	# on the west, Washington Street on the north, and 2nd Avenue on the east would contain addresses
	# from each of the four streets that form the block's boundaries. Joining addresses to traditional
	# blocks therefore produces entities that are distinct from the way we think about a blockface.
	#
	# We need to be able to do two things - identify incidents that occur on "marked" blocks, and then
	# also produce counts of incidents for all blocks.
	#
	# The census bureau pubishes something that could be helpful - line data with block segements.
	# These include the low and high address numbers for both sides of the street (i.e. the blockface).
	# However, I need a quick way to apply the blockface identification numbers to individual
	# addresses in the incident data.

	# dependencies
	library(dplyr)
	library(purrr)
	library(stringr)
	library(tidyr)

	# custom function to create a list-column with every other integer value between the low
	# and high values for a given side of the blockface
	parse_range <- function(x){

	# convert item to numeric
	vector <- as.numeric(x)

	# expand vector to include every other integer between low and high values
	out <- seq.int(from = vector[1], to = vector[2], by = 2)

	# return output
	return(out)

	}

	# sample block data
	blocks <- tibble(
	bfId = c(1,2,3,4),
	rightLow = c(400,500,600,700),
	rightHigh = c(498,598,698,798),
	leftLow = c(401,501,601,701),
	leftHigh = c(499,599,699,799),
	street = c("Main St", "Main St", "Main St", "Main St"),
	marked = c(TRUE, FALSE, FALSE, FALSE)
	)

	# sample incident data
	incidents <- tibble(
	callId = c(101, 102, 103, 104, 105),
	address = c("424 Main St", "447 Main St", "504 Main St", "667 Main St", "773 Main St"),
	date = c("1/1/14", "3/6/14", "5/12/14", "4/19/14", "2/12/14"),
	call = c("Graffiti", "Graffiti", "Pothole", "Lights Out", "Vacant Building")
	)

	# convert ranges into individual records, right side of street
	blocks %>%
	select(-c(leftLow, leftHigh)) %>%
	mutate(
	rightRange = str_split(string = str_c(as.character(rightLow), "-", as.character(rightHigh)), pattern = "-")
	) %>%
	mutate(rightRange = map(.x = rightRange, .f = parse_range)) %>%
	unnest() %>%
	select(-c(rightLow, rightHigh)) %>%
	rename(house = rightRange) %>%
	select(bfId, house, street, marked) -> right

	# convert ranges into individual records, left side of street
	blocks %>%
	select(-c(rightLow, rightHigh)) %>%
	mutate(
	leftRange = str_split(string = str_c(as.character(leftLow), "-", as.character(leftHigh)), pattern = "-")
	) %>%
	mutate(leftRange = map(.x = leftRange, .f = parse_range)) %>%
	unnest() %>%
	select(-c(leftLow, leftHigh)) %>%
	rename(house = leftRange) %>%
	select(bfId, house, street, marked) -> left

	# combine left and rigt side of street data
	bind_rows(right, left) %>%
	arrange(bfId, house) %>%
	mutate(address = str_c(house, street, sep = " ")) -> master

	# combine master and incident data to apply blockface ids and
	# the logical indicator of a "marked" block to each incident
	master %>%
	select(bfId, address, marked) %>%
	left_join(incidents, ., by = "address") -> incidentsWithBlock

	# calculate counts per blockface
	incidentsWithBlock %>%
	group_by(bfId) %>%
	summarise(count = n()) %>%
	left_join(blocks, ., by = "bfId") -> countsByBlock