Skip to content

Instantly share code, notes, and snippets.

@njtierney
Last active July 18, 2024 01:19
Show Gist options
  • Save njtierney/9066bc08ec7968cc3149fc7e266420b7 to your computer and use it in GitHub Desktop.
Save njtierney/9066bc08ec7968cc3149fc7e266420b7 to your computer and use it in GitHub Desktop.
library(terra)
#> terra 1.7.78

We are exploring an approach in {geotargets} for splitting up a raster into tiles and then using that to split up rasters that we will then do tasks on. To do this, Eric wrote this create_tile_exts function, which is powered by terra::getTileExtents:

create_tile_exts <- function(raster, ncol, nrow) {
  template <- terra::rast(
    x = terra::ext(raster),
    ncol = ncol,
    nrow = nrow,
    crs = terra::crs(raster)
  )
  tile_ext <- terra::getTileExtents(
    x = raster,
    template
  )
  n_tiles <- seq_len(nrow(tile_ext))
  tile_list <- lapply(
    n_tiles,
    \(i) tile_ext[i,]
  )
  tile_list
}

To illustrate, this creates a list of extents for the raster. Here’s an example with a 4x4 raster

data_16 <- matrix(1:16, nrow = 4, ncol = 4, byrow = TRUE)
r_16 <- terra::rast(data_16)
r_16_tiles_2x2 <- create_tile_exts(r_16, ncol = 2, nrow = 2)
r_16_tiles_2x2
#> [[1]]
#> xmin xmax ymin ymax 
#>    0    2    2    4 
#> 
#> [[2]]
#> xmin xmax ymin ymax 
#>    2    4    2    4 
#> 
#> [[3]]
#> xmin xmax ymin ymax 
#>    0    2    0    2 
#> 
#> [[4]]
#> xmin xmax ymin ymax 
#>    2    4    0    2

plot_rast_tiles <- function(rast, tiles){
  plot(rast)
  invisible(lapply(
    tiles, 
    \(x) plot(ext(x), add = TRUE, border = "hotpink")
  ))
}

plot_rast_tiles(r_16, r_16_tiles_2x2)

This works appropriately. However, when we provide a different tile extent, say a 3x3, we get something unexpected.

r_16_tiles_3x3 <- create_tile_exts(r_16, ncol = 3, nrow = 3)
r_16_tiles_3x3
#> [[1]]
#> xmin xmax ymin ymax 
#>    0    1    3    4 
#> 
#> [[2]]
#> xmin xmax ymin ymax 
#>    1    3    3    4 
#> 
#> [[3]]
#> xmin xmax ymin ymax 
#>    3    4    3    4 
#> 
#> [[4]]
#> xmin xmax ymin ymax 
#>    0    1    1    3 
#> 
#> [[5]]
#> xmin xmax ymin ymax 
#>    1    3    1    3 
#> 
#> [[6]]
#> xmin xmax ymin ymax 
#>    3    4    1    3 
#> 
#> [[7]]
#> xmin xmax ymin ymax 
#>    0    1    0    1 
#> 
#> [[8]]
#> xmin xmax ymin ymax 
#>    1    3    0    1 
#> 
#> [[9]]
#> xmin xmax ymin ymax 
#>    3    4    0    1

plot_rast_tiles(r_16, r_16_tiles_3x3)

to demonstrate which tiles are which, here’s a plot

plot_rast_tile_colours <- function(rast, tile){
  pal <- rainbow(n = length(tile))
  plot(rast)
  for (i in seq_len(length(tile))){
    plot(ext(tile[[i]]), add = TRUE, border = pal[i], main = paste0(i))
  }
}

plot_rast_tile_colours(r_16, r_16_tiles_2x2)

plot_rast_tile_colours(r_16, r_16_tiles_3x3)

There are some other strange use cases as well:

data_81 <- matrix(1:81, nrow = 9, ncol = 9, byrow = TRUE)
r_81 <- terra::rast(data_81)
r_81_tile_2x2 <- create_tile_exts(r_81, ncol = 2, nrow = 2)
r_81_tile_2x2
#> [[1]]
#> xmin xmax ymin ymax 
#>    0    5    5    9 
#> 
#> [[2]]
#> xmin xmax ymin ymax 
#>    5    9    5    9 
#> 
#> [[3]]
#> xmin xmax ymin ymax 
#>    0    5    0    5 
#> 
#> [[4]]
#> xmin xmax ymin ymax 
#>    5    9    0    5
plot_rast_tiles(r_81, r_81_tile_2x2)

plot_rast_tile_colours(r_81, r_81_tile_2x2)

using 3x3 is good:

r_81_tiles_3x3 <- create_tile_exts(r_81, ncol = 3, nrow = 3)
plot_rast_tiles(r_81, r_81_tiles_3x3)

plot_rast_tile_colours(r_81, r_81_tiles_3x3)

using 4x4 is bad:

r_81_tiles_4x4 <- create_tile_exts(r_81, ncol = 4, nrow = 4)

plot_rast_tiles(r_81, r_81_tiles_4x4)

plot_rast_tile_colours(r_81, r_81_tiles_4x4)

To help demonstrate how these are being written, here’s a little gif generator

plot_rast_tile_index <- function(rast, tile, index){
  pal <- rainbow(n = length(tile))
  plot(rast)
  plot(ext(tile[[index]]), add = TRUE, border = pal[index], main = paste0(index))
}

animate_plot_rast <- function(rast, tiles, gifname){
  tile_iterator <- seq_len(length(tiles))
  filenames <- glue::glue("tile-example-{tile_iterator}.png")
  for (i in tile_iterator){
    png(filenames[i])
    plot_rast_tile_index(rast, tiles, i)
    dev.off()
  }
  
  png_files <- filenames
  gif_file_name <- glue::glue("{gifname}.gif")
  gif_file <- gifski::gifski(png_files = png_files, gif_file = gif_file_name)
  unlink(png_files)
  
}

# not run with reprex as this writes to file!

# animate_plot_rast(r_81, r_81_tiles_4x4, "81-4x4")
# animate_plot_rast(r_81, r_81_tile_2x2, "81-2x2")
# animate_plot_rast(r_81, r_81_tile_2x2, "81-3x3")
# animate_plot_rast(r_16, r_16_tiles_2x2, "16-2x2")
# animate_plot_rast(r_16, r_16_tiles_3x3, "16-3x3")

Finally there’s a strange case with evel.tif where we get some overlap:

f <- system.file("ex/elev.tif", package="terra")
r <- terra::rast(f)
plot(r)
fileBlocksize(r)
#>      rows cols
#> [1,]   43   95
dim(r)
#> [1] 90 95  1
r_tiles_2x2 <- create_tile_exts(r, ncol = 2, nrow = 2)
plot_rast_tiles(r, r_tiles_2x2)

Created on 2024-07-18 with reprex v2.1.0

Session info

sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.4.0 (2024-04-24)
#>  os       macOS Sonoma 14.5
#>  system   aarch64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       Australia/Hobart
#>  date     2024-07-18
#>  pandoc   3.2.1 @ /opt/homebrew/bin/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version    date (UTC) lib source
#>  cli           3.6.3      2024-06-21 [1] CRAN (R 4.4.0)
#>  codetools     0.2-20     2024-03-31 [2] CRAN (R 4.4.0)
#>  curl          5.2.1      2024-03-01 [1] CRAN (R 4.4.0)
#>  digest        0.6.36     2024-06-23 [1] CRAN (R 4.4.0)
#>  evaluate      0.24.0     2024-06-10 [1] CRAN (R 4.4.0)
#>  fastmap       1.2.0      2024-05-15 [1] CRAN (R 4.4.0)
#>  fs            1.6.4.9000 2024-06-26 [1] Github (r-lib/fs@714990b)
#>  glue          1.7.0      2024-01-09 [1] CRAN (R 4.4.0)
#>  highr         0.11       2024-05-26 [1] CRAN (R 4.4.0)
#>  htmltools     0.5.8.1    2024-04-04 [1] CRAN (R 4.4.0)
#>  knitr         1.48       2024-07-07 [1] CRAN (R 4.4.0)
#>  lifecycle     1.0.4      2023-11-07 [1] CRAN (R 4.4.0)
#>  magrittr      2.0.3      2022-03-30 [1] CRAN (R 4.4.0)
#>  purrr         1.0.2      2023-08-10 [1] CRAN (R 4.4.0)
#>  R.cache       0.16.0     2022-07-21 [1] CRAN (R 4.4.0)
#>  R.methodsS3   1.8.2      2022-06-13 [1] CRAN (R 4.4.0)
#>  R.oo          1.26.0     2024-01-24 [1] CRAN (R 4.4.0)
#>  R.utils       2.12.3     2023-11-18 [1] CRAN (R 4.4.0)
#>  Rcpp          1.0.12     2024-01-09 [1] CRAN (R 4.4.0)
#>  reprex        2.1.0      2024-01-11 [1] CRAN (R 4.4.0)
#>  rlang         1.1.4      2024-06-04 [1] CRAN (R 4.4.0)
#>  rmarkdown     2.27       2024-05-17 [1] CRAN (R 4.4.0)
#>  rstudioapi    0.16.0     2024-03-24 [1] CRAN (R 4.4.0)
#>  sessioninfo   1.2.2      2021-12-06 [1] CRAN (R 4.4.0)
#>  styler        1.10.3     2024-04-07 [1] CRAN (R 4.4.0)
#>  terra       * 1.7-78     2024-05-22 [1] CRAN (R 4.4.0)
#>  vctrs         0.6.5      2023-12-01 [1] CRAN (R 4.4.0)
#>  withr         3.0.0      2024-01-16 [1] CRAN (R 4.4.0)
#>  xfun          0.45       2024-06-16 [1] CRAN (R 4.4.0)
#>  xml2          1.3.6      2023-12-04 [1] CRAN (R 4.4.0)
#>  yaml          2.3.9      2024-07-05 [1] CRAN (R 4.4.0)
#> 
#>  [1] /Users/nick/Library/R/arm64/4.4/library
#>  [2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────
@mdsumner
Copy link

Here's a working "st_tile2" just because I got it right finally ... will have a deeper reconcile later on. st_tile should add these values when you provide it with a stars object or with a file name or with an optional bbox arg IMO

  ## a version of st_tile that takes dimension = c(ncol, nrow), extent = c(xmin, xmax, ymin, ymax)
st_tile2 <- function(dimension, extent = NULL, blocksize, overlap = 0) {
  if (is.null(extent)) extent <- c(0, dimension[1L], 0, dimension[2L])
  tiles <- stars::st_tile(dimension[1], dimension[2], 
                          x_window = blocksize[1], 
                          y_window = blocksize[2], overlap = overlap)
  resx <- diff(extent[1:2]) / dimension[1L]
  resy <- diff(extent[3:4]) / dimension[2L]

  xmin <- extent[1L] + (tiles[,"nXOff", drop = TRUE] - 1)  * resx
  xmax <- extent[1L] +  ((tiles[,"nXOff", drop = TRUE]-1) + tiles[, "nXSize", drop = TRUE]) * resx
  ## note minus y
  ymax <- extent[4L] + (tiles[,"nYOff", drop = TRUE] - 1)  * -resy
  ymin <- extent[4L]  + (tiles[, "nYOff", drop = TRUE] -1 + tiles[,"nYSize"])  * -resy
  
  cbind(tiles, xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax)
}
#head(b)
b <- st_tile2(c(360, 180), c(-180, 180, -90, 90), c(90, 90))
plot(range(b[,c("xmin", "xmax")]), range(b[,c('ymin', 'ymax')]))
rect(b[,"xmin"], b[,"ymin"], b[,"xmax"], b[,"ymax"])

## the example from stars
tif <- system.file("tif/L7_ETMs.tif", package = "stars")
library(stars)
#> Loading required package: abind
#> Loading required package: sf
#> Linking to GEOS 3.12.1, GDAL 3.9.0, PROJ 9.3.1; sf_use_s2() is TRUE
r <- read_stars(tif, proxy = TRUE)

idx <- st_tile2(dim(r), st_bbox(r)[c(1, 3, 2, 4)], c(256, 256))

library(terra)
#> terra 1.7.78
plot(rast(tif)[[1]])
par(xpd = NA)
rect(idx[,"xmin"], idx[, "ymin"], idx[, "xmax"], idx[,"ymax"], lwd = 5, border = "firebrick")

## what about with overlap

idxo <- st_tile2(dim(r), st_bbox(r)[c(1, 3, 2, 4)], c(256, 256), overlap = 5)

plot(range(idxo[, c("xmin", "xmax")]), range(idxo[, c("ymin", "ymax")]))
plot(rast(tif)[[1]], add = TRUE)
par(xpd = NA)
rect(idxo[,"xmin"], idxo[, "ymin"], idxo[, "xmax"], idxo[,"ymax"], lwd = 3, border = "firebrick")

Created on 2024-07-17 with reprex v2.0.2

@mdsumner
Copy link

mdsumner commented Jul 17, 2024

I think terra might be doing ok here, the weird thing is the idea of giving a raster that represents the tiling - when, it can't subdivide 4x4 into parts that have a 3x3 structure. In that first example, the best you can do is 2x2 within that set of 4x4 and it centres one and then has clipped tiles around it. I still think that's worth exploring, but I think terra has its own logic here when you give y = SpatRast. What you really should do, same as with st_tile is set the dimension of each tile. So, using a helper from vaster to plot:

plot(r_16); vaster::plot_extent(e <- getTileExtents(r_16, c(2, 2)), add = TRUE); e
     xmin xmax ymin ymax
[1,]    0    2    2    4
[2,]    2    4    2    4
[3,]    0    2    0    2
[4,]    2    4    0    2

that's unproblematic, there's no dangle.

image

But now with 3x3,

plot(r_16); vaster::plot_extent(e <- getTileExtents(r_16, c(3, 3)), add = TRUE); e
     xmin xmax ymin ymax
[1,]    0    3    1    4
[2,]    3    4    1    4
[3,]    0    3    0    1
[4,]    3    4    0    1

image

we start to get dangle, there's one whole 3x3 (aligned to top left), and then there's a 3x1 and 1x3 and a 1x1 - these are remnants of an overall tiling that is broader than the domain - how should that broader context get clipped? I think terra puts that responsibility back on the input when not using an i x j tile dimension.

Note that in terms of the broader domain, the arithmetic "grid logic" for tiling and rasters generally, does extend beyond a given extent (that's why it's stored as an affine transform "offset,scale" in that abstract way, because the math is most general that way and it's the job of software to clip back to a defined extent - this was discussed here in gdalraster: USDAForestService/gdalraster#339 (comment) and I only just realized the connection).

If you gave terra a raster that actually had the properties of that tiling, it would give out the right extents - it would clip the lower/right 3 tiles appropriately to the extent of the x raster, but you had to do the external alignment yourself. With just a number, it chooses top-left alignment and expected dangle, same as stars does, and same as GDAL would.

BTW, using a tile size (e.g. 256x256) is probably the right model, rather than "break this into n pieces", because what you want is a rough size of task for each worker, how much memory does a worker have and what's an efficient way to spread jobs then. Also relative to number of bands and the actual data type ( and, a bit of point for discussion!).

The logic behind the more centred way, where tiles get clipped in different ways makes sense from the point of view of making the most of "internal data" that is buffered away from edge effects. (That's my theory). Where there's a weird overlap - the last example in Nick's code with elev.tif - that's a different problem - I expect that's just a rounding case that terra is missing - I'll keep exploring!

@mdsumner
Copy link

The 'r_81_tiles_4x4' case is weird, and does fit any ideas I have.

But, the theory about the rounding checks out. Here we overlap a pixel column when we have odd number ncol for elev.tif, then we aggregate so that cols is even but rows is odd we lose a row:

  library(terra)
#> terra 1.7.78
f <- system.file("ex/elev.tif", package="terra")
r95x90 <- terra::rast(f)
plot(r95x90)
r2x2 <- rast(r95x90)
dim(r2x2) <- c(2, 2)
plot(r95x90); vaster::plot_extent(e <- getTileExtents(r95x90, r2x2), add = TRUE, col = rainbow(4, alpha = .5)); e

#>          xmin     xmax     ymin     ymax
#> [1,] 5.741667 6.141667 49.81667 50.19167
#> [2,] 6.133333 6.533333 49.81667 50.19167
#> [3,] 5.741667 6.141667 49.44167 49.81667
#> [4,] 6.133333 6.533333 49.44167 49.81667
r24x23 <- aggregate(r95x90, 4)
r2x2 <- rast(r24x23)
dim(r2x2) <- c(2, 2)
plot(r24x23); vaster::plot_extent(e <- getTileExtents(r24x23, r2x2), add = TRUE, col = rainbow(4, alpha = .5)); e

#>          xmin     xmax   ymin     ymax
#> [1,] 5.741667 6.141667 49.825 50.19167
#> [2,] 6.141667 6.541667 49.825 50.19167
#> [3,] 5.741667 6.141667 49.425 49.79167
#> [4,] 6.141667 6.541667 49.425 49.79167

Created on 2024-07-18 with reprex v2.0.2

@mdsumner
Copy link

mdsumner commented Jul 18, 2024

Reported here: rspatial/terra#1564 I'll see how this plays out then pursue the r_81_tiles_4x4 case. (maybe that's a rounding problem too, that just gives different intervals rather than missing/duplicate lines. )

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment