library(terra)
#> terra 1.7.78
We are exploring an approach in {geotargets} for splitting up a raster into tiles and then using that to split up rasters that we will then do tasks on. To do this, Eric wrote this create_tile_exts function, which is powered by terra::getTileExtents:
create_tile_exts <- function(raster, ncol, nrow) {
template <- terra::rast(
x = terra::ext(raster),
ncol = ncol,
nrow = nrow,
crs = terra::crs(raster)
)
tile_ext <- terra::getTileExtents(
x = raster,
template
)
n_tiles <- seq_len(nrow(tile_ext))
tile_list <- lapply(
n_tiles,
\(i) tile_ext[i,]
)
tile_list
}
To illustrate, this creates a list of extents for the raster. Here’s an example with a 4x4 raster
data_16 <- matrix(1:16, nrow = 4, ncol = 4, byrow = TRUE)
r_16 <- terra::rast(data_16)
r_16_tiles_2x2 <- create_tile_exts(r_16, ncol = 2, nrow = 2)
r_16_tiles_2x2
#> [[1]]
#> xmin xmax ymin ymax
#> 0 2 2 4
#>
#> [[2]]
#> xmin xmax ymin ymax
#> 2 4 2 4
#>
#> [[3]]
#> xmin xmax ymin ymax
#> 0 2 0 2
#>
#> [[4]]
#> xmin xmax ymin ymax
#> 2 4 0 2
plot_rast_tiles <- function(rast, tiles){
plot(rast)
invisible(lapply(
tiles,
\(x) plot(ext(x), add = TRUE, border = "hotpink")
))
}
plot_rast_tiles(r_16, r_16_tiles_2x2)
This works appropriately. However, when we provide a different tile extent, say a 3x3, we get something unexpected.
r_16_tiles_3x3 <- create_tile_exts(r_16, ncol = 3, nrow = 3)
r_16_tiles_3x3
#> [[1]]
#> xmin xmax ymin ymax
#> 0 1 3 4
#>
#> [[2]]
#> xmin xmax ymin ymax
#> 1 3 3 4
#>
#> [[3]]
#> xmin xmax ymin ymax
#> 3 4 3 4
#>
#> [[4]]
#> xmin xmax ymin ymax
#> 0 1 1 3
#>
#> [[5]]
#> xmin xmax ymin ymax
#> 1 3 1 3
#>
#> [[6]]
#> xmin xmax ymin ymax
#> 3 4 1 3
#>
#> [[7]]
#> xmin xmax ymin ymax
#> 0 1 0 1
#>
#> [[8]]
#> xmin xmax ymin ymax
#> 1 3 0 1
#>
#> [[9]]
#> xmin xmax ymin ymax
#> 3 4 0 1
plot_rast_tiles(r_16, r_16_tiles_3x3)
to demonstrate which tiles are which, here’s a plot
plot_rast_tile_colours <- function(rast, tile){
pal <- rainbow(n = length(tile))
plot(rast)
for (i in seq_len(length(tile))){
plot(ext(tile[[i]]), add = TRUE, border = pal[i], main = paste0(i))
}
}
plot_rast_tile_colours(r_16, r_16_tiles_2x2)
plot_rast_tile_colours(r_16, r_16_tiles_3x3)
There are some other strange use cases as well:
data_81 <- matrix(1:81, nrow = 9, ncol = 9, byrow = TRUE)
r_81 <- terra::rast(data_81)
r_81_tile_2x2 <- create_tile_exts(r_81, ncol = 2, nrow = 2)
r_81_tile_2x2
#> [[1]]
#> xmin xmax ymin ymax
#> 0 5 5 9
#>
#> [[2]]
#> xmin xmax ymin ymax
#> 5 9 5 9
#>
#> [[3]]
#> xmin xmax ymin ymax
#> 0 5 0 5
#>
#> [[4]]
#> xmin xmax ymin ymax
#> 5 9 0 5
plot_rast_tiles(r_81, r_81_tile_2x2)
plot_rast_tile_colours(r_81, r_81_tile_2x2)
using 3x3 is good:
r_81_tiles_3x3 <- create_tile_exts(r_81, ncol = 3, nrow = 3)
plot_rast_tiles(r_81, r_81_tiles_3x3)
plot_rast_tile_colours(r_81, r_81_tiles_3x3)
using 4x4 is bad:
r_81_tiles_4x4 <- create_tile_exts(r_81, ncol = 4, nrow = 4)
plot_rast_tiles(r_81, r_81_tiles_4x4)
plot_rast_tile_colours(r_81, r_81_tiles_4x4)
To help demonstrate how these are being written, here’s a little gif generator
plot_rast_tile_index <- function(rast, tile, index){
pal <- rainbow(n = length(tile))
plot(rast)
plot(ext(tile[[index]]), add = TRUE, border = pal[index], main = paste0(index))
}
animate_plot_rast <- function(rast, tiles, gifname){
tile_iterator <- seq_len(length(tiles))
filenames <- glue::glue("tile-example-{tile_iterator}.png")
for (i in tile_iterator){
png(filenames[i])
plot_rast_tile_index(rast, tiles, i)
dev.off()
}
png_files <- filenames
gif_file_name <- glue::glue("{gifname}.gif")
gif_file <- gifski::gifski(png_files = png_files, gif_file = gif_file_name)
unlink(png_files)
}
# not run with reprex as this writes to file!
# animate_plot_rast(r_81, r_81_tiles_4x4, "81-4x4")
# animate_plot_rast(r_81, r_81_tile_2x2, "81-2x2")
# animate_plot_rast(r_81, r_81_tile_2x2, "81-3x3")
# animate_plot_rast(r_16, r_16_tiles_2x2, "16-2x2")
# animate_plot_rast(r_16, r_16_tiles_3x3, "16-3x3")
Finally there’s a strange case with evel.tif where we get some overlap:
f <- system.file("ex/elev.tif", package="terra")
r <- terra::rast(f)
plot(r)
fileBlocksize(r)
#> rows cols
#> [1,] 43 95
dim(r)
#> [1] 90 95 1
r_tiles_2x2 <- create_tile_exts(r, ncol = 2, nrow = 2)
plot_rast_tiles(r, r_tiles_2x2)
Created on 2024-07-18 with reprex v2.1.0
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.4.0 (2024-04-24)
#> os macOS Sonoma 14.5
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz Australia/Hobart
#> date 2024-07-18
#> pandoc 3.2.1 @ /opt/homebrew/bin/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> cli 3.6.3 2024-06-21 [1] CRAN (R 4.4.0)
#> codetools 0.2-20 2024-03-31 [2] CRAN (R 4.4.0)
#> curl 5.2.1 2024-03-01 [1] CRAN (R 4.4.0)
#> digest 0.6.36 2024-06-23 [1] CRAN (R 4.4.0)
#> evaluate 0.24.0 2024-06-10 [1] CRAN (R 4.4.0)
#> fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.4.0)
#> fs 1.6.4.9000 2024-06-26 [1] Github (r-lib/fs@714990b)
#> glue 1.7.0 2024-01-09 [1] CRAN (R 4.4.0)
#> highr 0.11 2024-05-26 [1] CRAN (R 4.4.0)
#> htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0)
#> knitr 1.48 2024-07-07 [1] CRAN (R 4.4.0)
#> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.4.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.4.0)
#> purrr 1.0.2 2023-08-10 [1] CRAN (R 4.4.0)
#> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.4.0)
#> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.4.0)
#> R.oo 1.26.0 2024-01-24 [1] CRAN (R 4.4.0)
#> R.utils 2.12.3 2023-11-18 [1] CRAN (R 4.4.0)
#> Rcpp 1.0.12 2024-01-09 [1] CRAN (R 4.4.0)
#> reprex 2.1.0 2024-01-11 [1] CRAN (R 4.4.0)
#> rlang 1.1.4 2024-06-04 [1] CRAN (R 4.4.0)
#> rmarkdown 2.27 2024-05-17 [1] CRAN (R 4.4.0)
#> rstudioapi 0.16.0 2024-03-24 [1] CRAN (R 4.4.0)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.4.0)
#> styler 1.10.3 2024-04-07 [1] CRAN (R 4.4.0)
#> terra * 1.7-78 2024-05-22 [1] CRAN (R 4.4.0)
#> vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.4.0)
#> withr 3.0.0 2024-01-16 [1] CRAN (R 4.4.0)
#> xfun 0.45 2024-06-16 [1] CRAN (R 4.4.0)
#> xml2 1.3.6 2023-12-04 [1] CRAN (R 4.4.0)
#> yaml 2.3.9 2024-07-05 [1] CRAN (R 4.4.0)
#>
#> [1] /Users/nick/Library/R/arm64/4.4/library
#> [2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
#>
#> ──────────────────────────────────────────────────────────────────────────────
I think terra might be doing ok here, the weird thing is the idea of giving a raster that represents the tiling - when, it can't subdivide 4x4 into parts that have a 3x3 structure. In that first example, the best you can do is 2x2 within that set of 4x4 and it centres one and then has clipped tiles around it. I still think that's worth exploring, but I think terra has its own logic here when you give y = SpatRast. What you really should do, same as with st_tile is set the dimension of each tile. So, using a helper from vaster to plot:
that's unproblematic, there's no dangle.
But now with 3x3,
we start to get dangle, there's one whole 3x3 (aligned to top left), and then there's a 3x1 and 1x3 and a 1x1 - these are remnants of an overall tiling that is broader than the domain - how should that broader context get clipped? I think terra puts that responsibility back on the input when not using an i x j tile dimension.
Note that in terms of the broader domain, the arithmetic "grid logic" for tiling and rasters generally, does extend beyond a given extent (that's why it's stored as an affine transform "offset,scale" in that abstract way, because the math is most general that way and it's the job of software to clip back to a defined extent - this was discussed here in gdalraster: USDAForestService/gdalraster#339 (comment) and I only just realized the connection).
If you gave terra a raster that actually had the properties of that tiling, it would give out the right extents - it would clip the lower/right 3 tiles appropriately to the extent of the x raster, but you had to do the external alignment yourself. With just a number, it chooses top-left alignment and expected dangle, same as stars does, and same as GDAL would.
BTW, using a tile size (e.g. 256x256) is probably the right model, rather than "break this into n pieces", because what you want is a rough size of task for each worker, how much memory does a worker have and what's an efficient way to spread jobs then. Also relative to number of bands and the actual data type ( and, a bit of point for discussion!).
The logic behind the more centred way, where tiles get clipped in different ways makes sense from the point of view of making the most of "internal data" that is buffered away from edge effects. (That's my theory). Where there's a weird overlap - the last example in Nick's code with elev.tif - that's a different problem - I expect that's just a rounding case that terra is missing - I'll keep exploring!