Skip to content

Instantly share code, notes, and snippets.

@elipousson
Last active July 11, 2025 02:08
Show Gist options
  • Save elipousson/892f70b1e5974db89b8eb3a6083b21a5 to your computer and use it in GitHub Desktop.
Save elipousson/892f70b1e5974db89b8eb3a6083b21a5 to your computer and use it in GitHub Desktop.
#' Create a GeocodeServer using the Baltimore City EGIS Composite Locator
#' @noRd
egis_geocode_server <- function(
url = NULL,
token = arcgisutils::arc_token()) {
url <- url %||%
"https://egis.baltimorecity.gov/egis/rest/services/Locator/EGISCompositeLocator/GeocodeServer"
arcgisgeocode::geocode_server(
url = url,
token = token
)
}
#' Batch Geocode Addresses using the Baltimore City EGIS Composite Locator
#' @noRd
baltimore_geocode_addresses <- function(
single_line = NULL,
...,
geocoder = egis_geocode_server()) {
arcgisgeocode::geocode_addresses(
single_line = single_line,
...,
geocoder = geocoder
)
}
#' Scrub addresses for use with the EGIS Geocoder
scrub_address <- function(address) {
address |>
# Replace new lines with spaces and then trim whitespace
stringr::str_replace_all(
c("\n" = " ", "\r" = " ")
) |>
stringr::str_trim() |>
# Remove hashtags, commas, and trailing digits (likely zipcodes)
stringr::str_remove_all(
"[#]|,|(\\d+$)"
) |>
stringr::str_to_lower() |>
# Remove trailing country abbreviation or name (non-trailing)
stringr::str_remove(
"[:blank:]?usa$|us$|united states[:blank:]?"
) |>
# Remove trailing state abbreviation or name
stringr::str_remove(
"[:blank:]?(md|maryland)[:blank:]?$"
) |>
# Remove trailing city or name
stringr::str_remove(
"[:blank:]?(balt|baltimore)[:blank:]?$"
) |>
# Remove all text appearing after the word suite
# NOTE: This is fairly aggressive
stringr::str_remove(
"[:blank:]?suite.*"
) |>
# Remove decimals from addresses
# TODO: Handle fractions and address ranges
stringr::str_remove(
"\\.\\d+"
)
}
address_to_neighborhood <- function(
address,
crs = 3857,
.progress = FALSE
) {
stopifnot(all(is.character(address)))
scrubbed_address <- scrub_address(address)
geocoded_address <- baltimore_geocode_addresses(
scrubbed_address,
crs = crs,
.progress = .progress
) |>
dplyr::mutate(
input_address = address,
scrubbed_address = scrubbed_address
) |>
dplyr::select(
input_address,
scrubbed_address,
output_address = match_addr
)
if (!all(sf::st_is_empty(geocoded_address))) {
geocoded_address <- geocoded_address |>
dplyr::bind_cols(
# Bind lon/lat coordinates
geocoded_address |>
sf::st_transform(4326) |>
sf::st_coordinates() |>
as.data.frame() |>
purrr::set_names(c("x", "y"))
)
} else {
# Use OSM Nominatim as alternate geocoder
# FIXME: This could be improved to be used for just unmatched addresses
# asis it only is used if the EGIS geocoder finds nothing
geocoded_address <- tidygeocoder::geo(
# Use original (not cleaned) address
# Assumes the city and state are retained in the original input
address,
lat = "y",
long = "x",
full_results = TRUE,
quiet = TRUE,
progress_bar = .progress
)
if (is.data.frame(geocoded_address)) {
geocoded_address <- geocoded_address |>
dplyr::select(
input_address = address,
scrubbed_address = scrubbed_address, # Unused by Nominatim but maybe helpful for trouble-shooting
output_address = display_name,
x,
y
) |>
sf::st_as_sf(
coords = c("x", "y"),
remove = FALSE,
crs = 4326
)
geocoded_address <- sf::st_transform(geocoded_address, crs = crs)
} else {
# error?
return(
data.frame(
status = "Invalid input"
)
)
}
}
neighborhoods <- arcgislayers::arc_read(
url = "https://geodata.baltimorecity.gov/egis/rest/services/CityView/Neighborhoods/FeatureServer/0",
col_select = "Name",
col_names = "neighborhood",
crs = crs
)
counties <- arcgislayers::arc_read(
url = "https://mdgeodata.md.gov/imap/rest/services/Boundaries/MD_PhysicalBoundaries/FeatureServer/1",
col_select = "county",
crs = crs
)
geocoded_address |>
sf::st_join(
neighborhoods
) |>
sf::st_join(
counties
) |>
dplyr::mutate(
status = dplyr::case_when(
sf::st_is_empty(geometry) ~ "Invalid input",
!is.na(neighborhood) ~ "Matched to Baltimore City neighborhood",
is.na(county) & !sf::st_is_empty(geometry) ~ "Outside Maryland",
county != "Baltimore City" & !sf::st_is_empty(geometry) ~ "Outside Baltimore City",
.default = NA_character_
),
.before = tidyselect::everything()
)
}
# Run this example in an interactive session
if (rlang::is_interactive()) {
address_to_neighborhood(
c(
"1.5 N. Charles Street",
"10 W. Read St",
"3200 Greenmount Avenue"
)
)
# OSM Nominatim supports place names
address_to_neighborhood(
c(
"Patterson Park, Baltimore, MD",
"Baltimore City Hall, Maryland"
)
)
# Mixing the two means the invalid addresses aren't checked by Nominatim
# This is fixable but not implemented
address_to_neighborhood(
c(
"10 W. Read St",
"Baltimore City Hall, Maryland"
)
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment