Last active
February 18, 2017 07:51
-
-
Save obrl-soil/0c51f99558e2704d77b6786b92df0fd9 to your computer and use it in GitHub Desktop.
Identify Multipolygons in sp and sf class objects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### sp Polygon objects ### | |
# x = spatialPolygons or spatialPolygonsDataFrame object only | |
# by_id = where true, adds a column to @data with a part count for each polygon. Promotes sPolys to sPolydf. | |
find_mp <- function(x = NULL, id_rows = FALSE) { | |
nparts <- vector("list", length(x@polygons)) | |
for (i in seq_along(1:length(x@polygons))) { | |
n <- vector("list", length(x@polygons[[i]]@Polygons)) | |
for (j in seq_along(1:length(x@polygons[[i]]@Polygons))) { | |
n[[j]] <- x@polygons[[i]]@Polygons[[j]]@hole | |
} | |
nparts[[i]] <- sum(unlist(n) == FALSE) | |
} | |
PARTS <- unlist(nparts) | |
if (id_rows == FALSE){ | |
if(sum(PARTS) / nrow(x@data) == 1) { | |
return(FALSE) | |
} else { | |
return(TRUE) | |
} | |
} else if(class(x)[[1]] == 'SpatialPolygons') { | |
x <- as(x, 'SpatialPolygonsDataFrame') | |
x@data <- cbind(x@data, PARTS) | |
x@data$dummy <- NULL | |
} else { | |
x@data <- cbind(x@data, PARTS) | |
} | |
return(x) | |
} | |
### sp objects ### | |
# with thanks to @mdsumner for the fortify-based approach, this should work on point and line objects as well as polys | |
# x = sp object | |
# by_id = where true, adds a column to @data with a part count for each polygon. Promotes s* to s*df | |
find_mp_fort <- function(x = NULL, id_rows = FALSE) { | |
suppressMessages(xf <- fortify(x)) | |
nparts <- if (is.null(xf$holes)) { | |
xf %>% distinct(id, group) %>% | |
group_by(id) %>% | |
summarize(nparts = n()) | |
} else { | |
xf %>% distinct(id, group, hole) %>% | |
filter(hole == FALSE) %>% | |
group_by(id) %>% | |
summarise(nparts = n()) | |
} | |
if (id_rows == FALSE){ | |
if(sum(nparts$nparts) / nrow(nparts) == 1) { | |
return(FALSE) | |
} else { | |
return(TRUE) | |
} | |
} else if (class(x)[[1]] == 'SpatialPolygons') { | |
x <- as(x, 'SpatialPolygonsDataFrame') | |
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id', | |
all.x = TRUE, all.y = FALSE) | |
x@data$dummy <- NULL | |
} else if (class(x)[[1]] == 'SpatialLines') { | |
x <- as(x, 'SpatialLinesDataFrame') | |
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id', | |
all.x = TRUE, all.y = FALSE) | |
x@data$dummy <- NULL | |
} else if (class(x)[[1]] == 'SpatialPoints') { | |
x <- as(x, 'SpatialPointsDataFrame') | |
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id', | |
all.x = TRUE, all.y = FALSE) | |
x@data$dummy <- NULL | |
} else { | |
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id', | |
all.x = TRUE, all.y = FALSE) | |
} | |
x@data$Row.names <- NULL | |
return(x) | |
} | |
### sf objects ### | |
# x = sf object | |
# by_id = where true, adds a column reporting the geometry type for each row | |
# note that you must use st_read() with option promote_to_multi = FALSE if you want a sensible output where by_id = TRUE | |
st_findmp <- function(x = NULL, id_rows = FALSE) { | |
nparts_vec <- st_geometry_type(x$geometry) | |
if (id_rows == FALSE){ | |
return(as.character(unique(st_geometry_type(x)))) | |
} else { | |
x$nparts <- nparts_vec | |
return(x) | |
} | |
} |
A more formal treatment would go something like this:
count_paths <- function(x, ...) UseMethod("count_paths")
count_paths.MULTIPOLYGON <- function(x, ...) sum(unlist(lapply(x, length)))
count_paths.POLYGON <- function(x, ...) length(x)
count_paths.LINESTRING <- function(x, ...) 1L
count_paths.MULTILINESTRING <- function(x, ...) length(x)
count_paths.MULTIPOINT <- function(x, ...) nrow(x)
count_paths.POINT <- function(x, ...) 1L
count_paths.sfc <- function(x, ...) unlist(lapply(x, count_paths))
count_paths.sf <- function(x, ...) count_paths(st_geometry(x))
library(sf)
example(st_read)
count_paths(nc)
library(maptools)
data(wrld_simpl)
count_paths(st_as_sf(wrld_simpl))
We can even pipeline this
library(dplyr)
> st_as_sf(wrld_simpl) %>% mutate(np = count_paths(geometry)) %>% arrange(desc(np)) %>% filter(np > 200) %>% select(NAME,AREA)
Simple feature collection with 4 features and 2 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: -180 ymin: -90 xmax: 180 ymax: 83.57027
epsg (SRID): 4326
proj4string: +proj=longlat +datum=WGS84 +no_defs
NAME AREA geometry
1 Canada 909351 MULTIPOLYGON(((-134.4955444...
2 Indonesia 181157 MULTIPOLYGON(((116.04942512...
3 United States 915896 MULTIPOLYGON(((-95.07806396...
4 Russia 1638094 MULTIPOLYGON(((104.26748847...
I'm using "path" to mean "any distinct part, like a linestring, island, hole, or point" - and treating a coordinate as a kind of degenerate go-nowhere "path". It makes sense in a more general context (trust me). Manifold GIS calls this a "branch", but there's no other consistent name anywhere afaik.
(thanks for the inspiration, feel free to agitate on details)
Awesome, thanks! My dplyr-fu is weak as yet. Fortify is a bit slower but does allow a function that'll work on point and line sp objects, see edits above
for sf, holes are matrices in the geometry list, and parts are sub-lists holding a matrix e.g.
x <- as.list(sf[i, 'geometry]) # what's that warning about anyway...
y <- sapply(x$geometry[[1]], function(x) class(x))
should make it easy to distinguish the two?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
In sf it's a bit simpler, since the recursive lists are just lists:
But please note, we are not differentiating between holes and islands as parts here.