-
-
Save obrl-soil/0c51f99558e2704d77b6786b92df0fd9 to your computer and use it in GitHub Desktop.
### sp Polygon objects ### | |
# x = spatialPolygons or spatialPolygonsDataFrame object only | |
# by_id = where true, adds a column to @data with a part count for each polygon. Promotes sPolys to sPolydf. | |
find_mp <- function(x = NULL, id_rows = FALSE) { | |
nparts <- vector("list", length(x@polygons)) | |
for (i in seq_along(1:length(x@polygons))) { | |
n <- vector("list", length(x@polygons[[i]]@Polygons)) | |
for (j in seq_along(1:length(x@polygons[[i]]@Polygons))) { | |
n[[j]] <- x@polygons[[i]]@Polygons[[j]]@hole | |
} | |
nparts[[i]] <- sum(unlist(n) == FALSE) | |
} | |
PARTS <- unlist(nparts) | |
if (id_rows == FALSE){ | |
if(sum(PARTS) / nrow(x@data) == 1) { | |
return(FALSE) | |
} else { | |
return(TRUE) | |
} | |
} else if(class(x)[[1]] == 'SpatialPolygons') { | |
x <- as(x, 'SpatialPolygonsDataFrame') | |
x@data <- cbind(x@data, PARTS) | |
x@data$dummy <- NULL | |
} else { | |
x@data <- cbind(x@data, PARTS) | |
} | |
return(x) | |
} | |
### sp objects ### | |
# with thanks to @mdsumner for the fortify-based approach, this should work on point and line objects as well as polys | |
# x = sp object | |
# by_id = where true, adds a column to @data with a part count for each polygon. Promotes s* to s*df | |
find_mp_fort <- function(x = NULL, id_rows = FALSE) { | |
suppressMessages(xf <- fortify(x)) | |
nparts <- if (is.null(xf$holes)) { | |
xf %>% distinct(id, group) %>% | |
group_by(id) %>% | |
summarize(nparts = n()) | |
} else { | |
xf %>% distinct(id, group, hole) %>% | |
filter(hole == FALSE) %>% | |
group_by(id) %>% | |
summarise(nparts = n()) | |
} | |
if (id_rows == FALSE){ | |
if(sum(nparts$nparts) / nrow(nparts) == 1) { | |
return(FALSE) | |
} else { | |
return(TRUE) | |
} | |
} else if (class(x)[[1]] == 'SpatialPolygons') { | |
x <- as(x, 'SpatialPolygonsDataFrame') | |
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id', | |
all.x = TRUE, all.y = FALSE) | |
x@data$dummy <- NULL | |
} else if (class(x)[[1]] == 'SpatialLines') { | |
x <- as(x, 'SpatialLinesDataFrame') | |
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id', | |
all.x = TRUE, all.y = FALSE) | |
x@data$dummy <- NULL | |
} else if (class(x)[[1]] == 'SpatialPoints') { | |
x <- as(x, 'SpatialPointsDataFrame') | |
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id', | |
all.x = TRUE, all.y = FALSE) | |
x@data$dummy <- NULL | |
} else { | |
x@data <- merge(x@data, nparts, by.x = 0, by.y = 'id', | |
all.x = TRUE, all.y = FALSE) | |
} | |
x@data$Row.names <- NULL | |
return(x) | |
} | |
### sf objects ### | |
# x = sf object | |
# by_id = where true, adds a column reporting the geometry type for each row | |
# note that you must use st_read() with option promote_to_multi = FALSE if you want a sensible output where by_id = TRUE | |
st_findmp <- function(x = NULL, id_rows = FALSE) { | |
nparts_vec <- st_geometry_type(x$geometry) | |
if (id_rows == FALSE){ | |
return(as.character(unique(st_geometry_type(x)))) | |
} else { | |
x$nparts <- nparts_vec | |
return(x) | |
} | |
} |
In sf it's a bit simpler, since the recursive lists are just lists:
wrld_simpl$nparts <- unlist(lapply(st_geometry(sf::st_as_sf(wrld_simpl)), length))
subset(wrld_simpl, nparts > 1)
But please note, we are not differentiating between holes and islands as parts here.
A more formal treatment would go something like this:
count_paths <- function(x, ...) UseMethod("count_paths")
count_paths.MULTIPOLYGON <- function(x, ...) sum(unlist(lapply(x, length)))
count_paths.POLYGON <- function(x, ...) length(x)
count_paths.LINESTRING <- function(x, ...) 1L
count_paths.MULTILINESTRING <- function(x, ...) length(x)
count_paths.MULTIPOINT <- function(x, ...) nrow(x)
count_paths.POINT <- function(x, ...) 1L
count_paths.sfc <- function(x, ...) unlist(lapply(x, count_paths))
count_paths.sf <- function(x, ...) count_paths(st_geometry(x))
library(sf)
example(st_read)
count_paths(nc)
library(maptools)
data(wrld_simpl)
count_paths(st_as_sf(wrld_simpl))
We can even pipeline this
library(dplyr)
> st_as_sf(wrld_simpl) %>% mutate(np = count_paths(geometry)) %>% arrange(desc(np)) %>% filter(np > 200) %>% select(NAME,AREA)
Simple feature collection with 4 features and 2 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: -180 ymin: -90 xmax: 180 ymax: 83.57027
epsg (SRID): 4326
proj4string: +proj=longlat +datum=WGS84 +no_defs
NAME AREA geometry
1 Canada 909351 MULTIPOLYGON(((-134.4955444...
2 Indonesia 181157 MULTIPOLYGON(((116.04942512...
3 United States 915896 MULTIPOLYGON(((-95.07806396...
4 Russia 1638094 MULTIPOLYGON(((104.26748847...
I'm using "path" to mean "any distinct part, like a linestring, island, hole, or point" - and treating a coordinate as a kind of degenerate go-nowhere "path". It makes sense in a more general context (trust me). Manifold GIS calls this a "branch", but there's no other consistent name anywhere afaik.
(thanks for the inspiration, feel free to agitate on details)
Awesome, thanks! My dplyr-fu is weak as yet. Fortify is a bit slower but does allow a function that'll work on point and line sp objects, see edits above
for sf, holes are matrices in the geometry list, and parts are sub-lists holding a matrix e.g.
x <- as.list(sf[i, 'geometry]) # what's that warning about anyway...
y <- sapply(x$geometry[[1]], function(x) class(x))
should make it easy to distinguish the two?
Can use fortify and then standard dplyr tools: