Created
April 3, 2019 12:45
-
-
Save obrl-soil/002659f46932602985c7de52f9b8c21c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# faffing about with methods to approximate a "lag vertex polygon" (Poore, 1986) | |
# or "best-fit regular geometric feature" (Stoddard, 1965) for polygon shape | |
# metric | |
# References: | |
# Poore 1986 - MSc Thesis on soil map quantification at | |
# https://ir.library.oregonstate.edu/downloads/zk51vk04q | |
# Stoddart, D. (1965). The shape of atolls. Marine Geology, 3(5), 369–383. | |
# doi:10.1016/0025-3227(65)90025-3 (its on scihub) | |
# also apparently Fridland 1972 "The pattern of the soil cover" but that's | |
# not online ;_; | |
# basically this is looking for a single-polygon simplification that is a) | |
# as close to n vertices as possible and b) as close to equidistant sides as | |
# possible. It would appear to assume a relatively high-detail polygon as input, | |
# with no islands and holes ignored if present. | |
library(sf) | |
library(tidyverse) | |
nc = st_read(system.file("shape/nc.shp", package="sf")) | |
nc1 <- nc[1, ] | |
nc1 <- st_transform(nc1, 3857) # shhh don't need max precision, just meters | |
st_cast(nc1, 'POINT') %>% nrow() # 27 points natively | |
# add points to a certain level of detail (0.5 km here) | |
nc1d <- st_segmentize(nc1, 500) | |
plot(st_cast(nc1d[0], 'POINT'), pch = 20, col = 'red') | |
# filter to get a set of points x dist apart (20 * 500 here - 10 km) | |
# but this is counted along the perimeter so the points aren't really | |
# equidistant as required. More problematic as shape gets more convoluted... | |
nc1_lp <- st_cast(nc1d, 'POINT') %>% | |
# may as well start from a random vertex: | |
filter((row_number() + sample(seq(nrow(.)), 1)) %% 20 == 0) | |
plot(nc1[0], reset = FALSE, border = 'grey60') | |
plot(nc1_lp[0], pch = 20, col = 'red', add = TRUE) | |
# surprisingly shape preserving although obs not foolproof | |
# defo only for simple polygons! try nc[4, ] haha | |
# back to poly with | |
lag_ish_poly <- st_coordinates(nc1_lp) %>% | |
rbind(., .[1, ]) %>% | |
list(.) %>% | |
st_polygon() | |
plot(lag_ish_poly, col = NA, border = 'red', add = TRUE) | |
# to get the lag sums after Stoddard's description: | |
## "The polygon thus formed can be described by summing the distances between | |
## all the vertices lag-one, lag-two,..., until all unique sums are determined; | |
## and by squaring and then summing the distances between all vertices lag-one, | |
## lag-two..., until all unique squared sums are determined. Each shape will | |
## then have a one-to-one correspondence with one of these sets of sums" | |
# and | |
## "Distances lag-one (S1) are given by AC, BD, CE,... HB, i.e., one vertex is | |
## skipped. Distances lag-two (S2) are given by AD, BE, CF:... HC; i.e., two | |
## vertices are skipped. Distances lag-three (S3) are given by AE, BF, CG,... | |
## HD; i.e., three vertices are skipped" | |
# Poore generalises this to (n-2/2) lag steps, where n is polygon vertices. This | |
# conveniently(?) gave him ten lag measurements for each 22-sided shape. | |
lagsums <- function(pts = NULL) { | |
nr <- nrow(pts) | |
lags <- map_dbl(seq(2, (nr - 2)/2), function(l) { | |
# subset in a loop | |
start <- pts[1, ] | |
mids <- pts[seq(nr) %% l == 0, ] | |
end <- pts[l, ] | |
lagset <- rbind(start, mids) | |
lagset1 <- rbind(mids, end) | |
lag_d <- as.numeric(st_distance(lagset, lagset1, by_element = TRUE)) | |
mean(lag_d) # gets around lack of perf. equal side dist | |
}) | |
matrix(c(lags, lags^2), ncol = 2, byrow = FALSE) | |
} | |
# anyway apparently this encodes the shape to a given precision... | |
lagsums(nc1_lp) | |
df <- as.data.frame(lagsums(nc1_lp)) %>% | |
mutate(LAG = seq(n()) + 1) | |
# but they don't plot to a nice smooth curve like the examples in Poore (p.37ish) | |
ggplot(df, aes(x = LAG, y = V1)) + | |
geom_line() + | |
theme_minimal() | |
# what happens if one segmentises and then runs ms_simplify, not worrying about | |
# edge length e.g. | |
nc1_mss <- | |
rmapshaper::ms_simplify(nc1d, | |
# gets as close to n vertices as possible: | |
keep = 1 / st_cast(nc1d, 'POINT') %>% nrow() * 25) %>% | |
st_cast(., 'POINT') | |
plot(nc1[0], reset = FALSE, border = 'grey60') | |
plot(nc1_mss[0], pch = 20, col = 'red', add = TRUE) | |
# messier! | |
lagsums(nc1_mss) | |
df2 <- as.data.frame(lagsums(nc1_mss)) %>% | |
mutate(LAG = seq(n()) + 1) | |
ggplot(df2, aes(x = LAG, y = V1)) + | |
geom_line() + | |
theme_minimal() | |
# anyway tbh there are simpler measures of shape/form/complexity but this | |
# is still intriguing |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's what I was thinking!
Created on 2019-04-04 by the reprex package (v0.2.1)