Last active
April 23, 2020 00:18
-
-
Save aejolene/f6fc2053dacb47463ae262bf245ef3c2 to your computer and use it in GitHub Desktop.
A rough script to pull geospatial data on COVID-19 cases by county from a Virginia Department of Health ArcGIS feature Service with R.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################# | |
#This is a very rough script to pull COVID-19 geospatial data from the VDH feature service. | |
# 2020-04-22 | |
################# | |
### Uncomment these if you need to install the packages for the first time | |
## install.packages(c("dplyr", "geojsonsf", "lubridate", "rmapshaper", "sf", "devtools")) | |
## library(devtools) | |
## install_github("yonghah/esri2sf") | |
library(dplyr) | |
library(esri2sf) | |
library(sf) | |
library(geojsonsf) | |
library(lubridate) | |
library(rmapshaper) | |
# The data is available at https://services5.arcgis.com/8CMfd3uYk8d4J22E/ArcGIS/rest/services/VDH_COVID19_Cases_by_Cnty/FeatureServer | |
# Here's the url to the layer containing county cases that we will pull. | |
covid_url <- "https://services5.arcgis.com/8CMfd3uYk8d4J22E/ArcGIS/rest/services/VDH_COVID19_Cases_by_Cnty/FeatureServer/0" | |
# Pull from the feature service into an sf object in R. This took a few minutes for me. | |
covid_sf <- esri2sf(covid_url) | |
# Check to see if it plots | |
plot(st_geometry(covid_sf)) | |
# That took a while to plot for me; I can tell that this is a big dataset. | |
# Get a look at the attributes | |
glimpse(covid_sf) | |
## The date comes out of the feature service in milliseconds (as a double). We'll need to convert that. | |
## This function converts milliseconds to date. Someone else wrote it. | |
ms_to_date = function(ms, t0="1970-01-01", timezone) { | |
## @ms: a numeric vector of milliseconds (big integers of 13 digits) | |
## @t0: a string of the format "yyyy-mm-dd", specifying the date that | |
## corresponds to 0 millisecond | |
## @timezone: a string specifying a timezone that can be recognized by R | |
## return: a POSIXct vector representing calendar dates and times | |
sec = ms / 1000 | |
as.POSIXct(sec, origin=t0, tz=timezone) | |
} | |
## This reformats back into ISO 8601 dates | |
covid_sf <- covid_sf %>% | |
mutate(Report_Date = ms_to_date(Report_Date, timezone="America/New_York")) | |
# Convert it to geojson | |
covid_geojson <- sf_geojson(covid_sf) | |
# This dataset very big because the county boundaries provided are quite detailed. I simplified with mapshaper. | |
# You can adjust simplification parameters but I just left the defaults. They can probably be simplified | |
# even further for a graphic map or a web map. | |
covid_simp <- covid_sf %>% | |
ms_simplify() | |
## Much bettter, down to about 7 Mb from 190 | |
#here's a simplified geojson object | |
covid_geojson_simp <- sf_geojson(covid_simp) | |
# write the geojson to a text file if desired | |
write(covid_geojson_simp, "va_covid_2020-04-22.json") | |
# write the sf object to a shapefile if desired | |
st_write(covid_simp, "va_covid_2020-04-22.shp") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment