Skip to content

Instantly share code, notes, and snippets.

@mdsumner
Created August 3, 2025 02:45
Show Gist options
  • Save mdsumner/2a7817917d7dbefaa2168cfda5ee1760 to your computer and use it in GitHub Desktop.
Save mdsumner/2a7817917d7dbefaa2168cfda5ee1760 to your computer and use it in GitHub Desktop.

do you need 500 slightly overlapping Zarr datasets?

src <- "/vsicurl/https://projects.pawsey.org.au/idea-gebco-tif/GEBCO_2024.tif"
src <- "https://projects.pawsey.org.au/idea-gebco-tif/GEBCO_2024.tif"
library(purrr) ## purrr CRAN
library(mirai) ## mirai CRAN
if (!file.exists(basename(src))) {
  curl::curl_download(src, basename(src))  ## curl CRAN
}
info <- vapour::vapour_raster_info(basename(src))  ## vapour CRAN
g <- grout::grout(info$dimension, info$extent, info$block * 6) ## devtols::install_github("hypertidy/grout")
idx <- grout::tile_index(g)

dir.create("zarr")
idx$lab <- sprintf("zarr/tile_%i_%i_%i.zarr", idx$tile, idx$tile_col, idx$tile_row)

mirai::daemons(0)
mirai::daemons(31)
fun <- in_parallel(function(.x) {
  b <- 1.0
  src <- "GEBCO_2024.tif"
  
  cmd <- "gdal raster clip --input %s --bbox %s --output-format ZARR --output %s --allow-bbox-outside-source"
  e <- unlist(.x[, c("xmin", "xmax", "ymin", "ymax")])
  bbox <- e[c(1, 3, 2, 4)] + b * c(-1, 1, -1, 1)
  if (file.exists(.x$lab)) return(NULL)
  system(sprintf(cmd, src, paste0(bbox, collapse = ","), .x$lab))
  NULL
})

walk(split(idx, 1:nrow(idx)), fun)
@mdsumner
Copy link
Author

mdsumner commented Aug 4, 2025

here's a version that uses a server for the GEBCO that is capable of intense requests (only a tiny bit slower than local!)

dsn <- "/vsicurl/https://data.source.coop/alexgleith/gebco-2024/GEBCO_2024.tif"
library(purrr) ## purrr CRAN
library(mirai) ## mirai CRAN
ds <- new(gdalraster::GDALRaster, dsn)
dm <- ds$dim()[1:2]
ex <- ds$bbox()[c(1, 3, 2, 4)]
block <-  ds$getBlockSize(1L)

g <- grout::grout(dm, ex, block * 6) ## devtols::install_github("hypertidy/grout")
idx <- grout::tile_index(g)

dir.create("zarr")
idx$lab <- sprintf("zarr/tile_%i_%i_%i.zarr", idx$tile, idx$tile_col, idx$tile_row)

mirai::daemons(0)
mirai::daemons(32)

translate <- in_parallel(function(.x) {
  b <- 1.0
  src <- "/vsicurl/https://data.source.coop/alexgleith/gebco-2024/GEBCO_2024.tif"
  
  e <- unlist(.x[, c("xmin", "xmax", "ymin", "ymax")])
  bbox <- e[c(1, 3, 2, 4)] + b * c(-1, -1, 1, 1)
  if (bbox[1] < -180) {
    bbox[1] <- -180
  }
  if (bbox[3] >180) {
    bbox[3] <- 180
  }
  if (bbox[2] < -90) {
    bbox[2] <- -90
  }
  if (bbox[4] > 90) {
    bbox[4] <- 90
  }
  
  dsn <- sprintf("vrt://GEBCO_2024.tif?projwin=%f,%f,%f,%f", bbox[1], bbox[4], bbox[3], bbox[2])
  
  if (file.exists(.x$lab)) return(NULL)
  gdalraster::translate(dsn, .x$lab, cl_arg = c("-of", "ZARR", "-co", "COMPRESS=ZSTD", "-co", "FORMAT=ZARR_V3"), quiet = TRUE)
  NULL
})
system.time(walk(split(idx, 1:nrow(idx)), translate))
## 25seconds

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment