All working from here https://virtualizarr.readthedocs.io/en/stable/usage.html (with HTTP tab)
## on the docker image
## reticulate::use_python("/workenv/bin/python3.12")
library(bowerbird)
bl <- bb_source(
name = "BRAN netcdf",
id = "BRAN-netcdf-0001",
description = "BRAN",
doc_url = "https://research.csiro.au/bluelink/global/reanalysis/",
source_url = "https://thredds.nci.org.au/thredds/catalog/gb6/BRAN/BRAN2020/catalog.html",
citation = "",
license = "",
method = list("bb_handler_thredds", level = 2, accept_download = "\\.nc$"),
access_function = "",
data_group = "")
res <- bb_get(bl, local_file_root = tempdir(), verbose = TRUE, dry_run = TRUE)
## pick out the salt var files ( daily time steps in 775 month files)
url <- grepv("daily/ocean_salt_[0-9]{4}_[0-9]{2}", res$files[[1]]$url)
library(purrr)
## it's probably 32 cpus
mirai::daemons(parallelly::availableCores()-2)
fun <- in_parallel(function(.x) {
## function in_parallel must be completely standalone
## (there are 'everywhere' settings, todo form)
virtualizarr <- reticulate::import("virtualizarr")
pickle <- reticulate::import("pickle")
## as.raw materializes actual bytes from a python object
as.raw(pickle$dumps(virtualizarr$open_virtual_dataset(.x, indexes = list())))
})
system.time({
datasets <- map(url[1:50], fun)
})
mirai::daemons(0)
xarray <- reticulate::import("xarray")
x <- xarray$concat(map(datasets, pickle$loads), dim = "Time", coords = "minimal", compat = "override", combine_attrs = "override")
x