128 cpus, 158 seconds
options(parallelly.fork.enable = TRUE, future.rng.onMisuse = "ignore")
library(furrr); plan(multicore)
d <- arrow::read_parquet("https://data.source.coop/ausantarctic/ghrsst-mur-v2/ghrsst-mur-v2.parquet")
dsn <- sprintf("/vsicurl/%s", d$assets$analysed_sst$href)
#(cell <- terra::cellFromXY(terra::rast(dsn[1]), cbind(147, -48)))
# 496796700
library(terra)
readfun <- function(x) {
chk <- try(terra::extract(terra::rast(x), 496796700, raw = TRUE)[,1], silent = TRUE)
if (inherits(chk, "try-error")) return(NA_real_)
chk
}
vals <- future_map_dbl(dsn, readfun)
arrow::write_parquet(tibble::tibble(sst = vals, date = d$datetime), "sst.parquet")
State: COMPLETED (exit code 0)
Nodes: 1
Cores per node: 256
CPU Utilized: 00:01:24
CPU Efficiency: 0.19% of 12:01:04 core-walltime
Job Wall-clock time: 00:02:49
Memory Utilized: 18.11 GB
Memory Efficiency: 7.87% of 230.00 GB