Last active
July 11, 2025 02:08
-
-
Save elipousson/892f70b1e5974db89b8eb3a6083b21a5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Create a GeocodeServer using the Baltimore City EGIS Composite Locator | |
#' @noRd | |
egis_geocode_server <- function( | |
url = NULL, | |
token = arcgisutils::arc_token()) { | |
url <- url %||% | |
"https://egis.baltimorecity.gov/egis/rest/services/Locator/EGISCompositeLocator/GeocodeServer" | |
arcgisgeocode::geocode_server( | |
url = url, | |
token = token | |
) | |
} | |
#' Batch Geocode Addresses using the Baltimore City EGIS Composite Locator | |
#' @noRd | |
baltimore_geocode_addresses <- function( | |
single_line = NULL, | |
..., | |
geocoder = egis_geocode_server()) { | |
arcgisgeocode::geocode_addresses( | |
single_line = single_line, | |
..., | |
geocoder = geocoder | |
) | |
} | |
#' Scrub addresses for use with the EGIS Geocoder | |
scrub_address <- function(address) { | |
address |> | |
# Replace new lines with spaces and then trim whitespace | |
stringr::str_replace_all( | |
c("\n" = " ", "\r" = " ") | |
) |> | |
stringr::str_trim() |> | |
# Remove hashtags, commas, and trailing digits (likely zipcodes) | |
stringr::str_remove_all( | |
"[#]|,|(\\d+$)" | |
) |> | |
stringr::str_to_lower() |> | |
# Remove trailing country abbreviation or name (non-trailing) | |
stringr::str_remove( | |
"[:blank:]?usa$|us$|united states[:blank:]?" | |
) |> | |
# Remove trailing state abbreviation or name | |
stringr::str_remove( | |
"[:blank:]?(md|maryland)[:blank:]?$" | |
) |> | |
# Remove trailing city or name | |
stringr::str_remove( | |
"[:blank:]?(balt|baltimore)[:blank:]?$" | |
) |> | |
# Remove all text appearing after the word suite | |
# NOTE: This is fairly aggressive | |
stringr::str_remove( | |
"[:blank:]?suite.*" | |
) |> | |
# Remove decimals from addresses | |
# TODO: Handle fractions and address ranges | |
stringr::str_remove( | |
"\\.\\d+" | |
) | |
} | |
address_to_neighborhood <- function( | |
address, | |
crs = 3857, | |
.progress = FALSE | |
) { | |
stopifnot(all(is.character(address))) | |
scrubbed_address <- scrub_address(address) | |
geocoded_address <- baltimore_geocode_addresses( | |
scrubbed_address, | |
crs = crs, | |
.progress = .progress | |
) |> | |
dplyr::mutate( | |
input_address = address, | |
scrubbed_address = scrubbed_address | |
) |> | |
dplyr::select( | |
input_address, | |
scrubbed_address, | |
output_address = match_addr | |
) | |
if (!all(sf::st_is_empty(geocoded_address))) { | |
geocoded_address <- geocoded_address |> | |
dplyr::bind_cols( | |
# Bind lon/lat coordinates | |
geocoded_address |> | |
sf::st_transform(4326) |> | |
sf::st_coordinates() |> | |
as.data.frame() |> | |
purrr::set_names(c("x", "y")) | |
) | |
} else { | |
# Use OSM Nominatim as alternate geocoder | |
# FIXME: This could be improved to be used for just unmatched addresses | |
# asis it only is used if the EGIS geocoder finds nothing | |
geocoded_address <- tidygeocoder::geo( | |
# Use original (not cleaned) address | |
# Assumes the city and state are retained in the original input | |
address, | |
lat = "y", | |
long = "x", | |
full_results = TRUE, | |
quiet = TRUE, | |
progress_bar = .progress | |
) | |
if (is.data.frame(geocoded_address)) { | |
geocoded_address <- geocoded_address |> | |
dplyr::select( | |
input_address = address, | |
scrubbed_address = scrubbed_address, # Unused by Nominatim but maybe helpful for trouble-shooting | |
output_address = display_name, | |
x, | |
y | |
) |> | |
sf::st_as_sf( | |
coords = c("x", "y"), | |
remove = FALSE, | |
crs = 4326 | |
) | |
geocoded_address <- sf::st_transform(geocoded_address, crs = crs) | |
} else { | |
# error? | |
return( | |
data.frame( | |
status = "Invalid input" | |
) | |
) | |
} | |
} | |
neighborhoods <- arcgislayers::arc_read( | |
url = "https://geodata.baltimorecity.gov/egis/rest/services/CityView/Neighborhoods/FeatureServer/0", | |
col_select = "Name", | |
col_names = "neighborhood", | |
crs = crs | |
) | |
counties <- arcgislayers::arc_read( | |
url = "https://mdgeodata.md.gov/imap/rest/services/Boundaries/MD_PhysicalBoundaries/FeatureServer/1", | |
col_select = "county", | |
crs = crs | |
) | |
geocoded_address |> | |
sf::st_join( | |
neighborhoods | |
) |> | |
sf::st_join( | |
counties | |
) |> | |
dplyr::mutate( | |
status = dplyr::case_when( | |
sf::st_is_empty(geometry) ~ "Invalid input", | |
!is.na(neighborhood) ~ "Matched to Baltimore City neighborhood", | |
is.na(county) & !sf::st_is_empty(geometry) ~ "Outside Maryland", | |
county != "Baltimore City" & !sf::st_is_empty(geometry) ~ "Outside Baltimore City", | |
.default = NA_character_ | |
), | |
.before = tidyselect::everything() | |
) | |
} | |
# Run this example in an interactive session | |
if (rlang::is_interactive()) { | |
address_to_neighborhood( | |
c( | |
"1.5 N. Charles Street", | |
"10 W. Read St", | |
"3200 Greenmount Avenue" | |
) | |
) | |
# OSM Nominatim supports place names | |
address_to_neighborhood( | |
c( | |
"Patterson Park, Baltimore, MD", | |
"Baltimore City Hall, Maryland" | |
) | |
) | |
# Mixing the two means the invalid addresses aren't checked by Nominatim | |
# This is fixable but not implemented | |
address_to_neighborhood( | |
c( | |
"10 W. Read St", | |
"Baltimore City Hall, Maryland" | |
) | |
) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment