Skip to content

Instantly share code, notes, and snippets.

@mdsumner
Last active September 17, 2025 03:52
Show Gist options
  • Select an option

  • Save mdsumner/c8a13dc70411cdb61344b0d2a7ef92a5 to your computer and use it in GitHub Desktop.

Select an option

Save mdsumner/c8a13dc70411cdb61344b0d2a7ef92a5 to your computer and use it in GitHub Desktop.
@mdsumner
Copy link
Author

python create the salt urls

d = ["1993_01", "1993_02", "1993_03", "1993_04", "1993_05", "1993_06",
"1993_07", "1993_08", "1993_09", "1993_10", "1993_11", "1993_12",
"1994_01", "1994_02", "1994_03", "1994_04", "1994_05", "1994_06",
"1994_07", "1994_08", "1994_09", "1994_10", "1994_11", "1994_12",
"1995_01", "1995_02", "1995_03", "1995_04", "1995_05", "1995_06",
"1995_07", "1995_08", "1995_09", "1995_10", "1995_11", "1995_12",
"1996_01", "1996_02", "1996_03", "1996_04", "1996_05", "1996_06",
"1996_07", "1996_08", "1996_09", "1996_10", "1996_11", "1996_12",
"1997_01", "1997_02", "1997_03", "1997_04", "1997_05", "1997_06",
"1997_07", "1997_08", "1997_09", "1997_10", "1997_11", "1997_12",
"1998_01", "1998_02", "1998_03", "1998_04", "1998_05", "1998_06",
"1998_07", "1998_08", "1998_09", "1998_10", "1998_11", "1998_12",
"1999_01", "1999_02", "1999_03", "1999_04", "1999_05", "1999_06",
"1999_07", "1999_08", "1999_09", "1999_10", "1999_11", "1999_12",
"2000_01", "2000_02", "2000_03", "2000_04", "2000_05", "2000_06",
"2000_07", "2000_08", "2000_09", "2000_10", "2000_11", "2000_12",
"2001_01", "2001_02", "2001_03", "2001_04", "2001_05", "2001_06",
"2001_07", "2001_08", "2001_09", "2001_10", "2001_11", "2001_12",
"2002_01", "2002_02", "2002_03", "2002_04", "2002_05", "2002_06",
"2002_07", "2002_08", "2002_09", "2002_10", "2002_11", "2002_12",
"2003_01", "2003_02", "2003_03", "2003_04", "2003_05", "2003_06",
"2003_07", "2003_08", "2003_09", "2003_10", "2003_11", "2003_12",
"2004_01", "2004_02", "2004_03", "2004_04", "2004_05", "2004_06",
"2004_07", "2004_08", "2004_09", "2004_10", "2004_11", "2004_12",
"2005_01", "2005_02", "2005_03", "2005_04", "2005_05", "2005_06",
"2005_07", "2005_08", "2005_09", "2005_10", "2005_11", "2005_12",
"2006_01", "2006_02", "2006_03", "2006_04", "2006_05", "2006_06",
"2006_07", "2006_08", "2006_09", "2006_10", "2006_11", "2006_12",
"2007_01", "2007_02", "2007_03", "2007_04", "2007_05", "2007_06",
"2007_07", "2007_08", "2007_09", "2007_10", "2007_11", "2007_12",
"2008_01", "2008_02", "2008_03", "2008_04", "2008_05", "2008_06",
"2008_07", "2008_08", "2008_09", "2008_10", "2008_11", "2008_12",
"2009_01", "2009_02", "2009_03", "2009_04", "2009_05", "2009_06",
"2009_07", "2009_08", "2009_09", "2009_10", "2009_11", "2009_12",
"2010_01", "2010_02", "2010_03", "2010_04", "2010_05", "2010_06",
"2010_07", "2010_08", "2010_09", "2010_10", "2010_11", "2010_12",
"2011_01", "2011_02", "2011_03", "2011_04", "2011_05", "2011_06",
"2011_07", "2011_08", "2011_09", "2011_10", "2011_11", "2011_12",
"2012_01", "2012_02", "2012_03", "2012_04", "2012_05", "2012_06",
"2012_07", "2012_08", "2012_09", "2012_10", "2012_11", "2012_12",
"2013_01", "2013_02", "2013_03", "2013_04", "2013_05", "2013_06",
"2013_07", "2013_08", "2013_09", "2013_10", "2013_11", "2013_12",
"2014_01", "2014_02", "2014_03", "2014_04", "2014_05", "2014_06",
"2014_07", "2014_08", "2014_09", "2014_10", "2014_11", "2014_12",
"2015_01", "2015_02", "2015_03", "2015_04", "2015_05", "2015_06",
"2015_07", "2015_08", "2015_09", "2015_10", "2015_11", "2015_12",
"2016_01", "2016_02", "2016_03", "2016_04", "2016_05", "2016_06",
"2016_07", "2016_08", "2016_09", "2016_10", "2016_11", "2016_12",
"2017_01", "2017_02", "2017_03", "2017_04", "2017_05", "2017_06",
"2017_07", "2017_08", "2017_09", "2017_10", "2017_11", "2017_12",
"2018_01", "2018_02", "2018_03", "2018_04", "2018_05", "2018_06",
"2018_07", "2018_08", "2018_09", "2018_10", "2018_11", "2018_12",
"2019_01", "2019_02", "2019_03", "2019_04", "2019_05", "2019_06",
"2019_07", "2019_08", "2019_09", "2019_10", "2019_11", "2019_12",
"2020_01", "2020_02", "2020_03", "2020_04", "2020_05", "2020_06",
"2020_07", "2020_08", "2020_09", "2020_10", "2020_11", "2020_12",
"2021_01", "2021_02", "2021_03", "2021_04", "2021_05", "2021_06",
"2021_07", "2021_08", "2021_09", "2021_10", "2021_11", "2021_12",
"2022_01", "2022_02", "2022_03", "2022_04", "2022_05", "2022_06",
"2022_07", "2022_08", "2022_09", "2022_10", "2022_11", "2022_12",
"2023_01", "2023_02", "2023_03", "2023_04", "2023_05", "2023_06",
"2023_07", "2023_08", "2023_09", "2023_10", "2023_11", "2023_12"]

fileserver = [f"https://thredds.nci.org.au/thredds/fileServer/gb6/BRAN/BRAN2020/daily/ocean_salt_{dd}.nc" for dd in d]

@mdsumner
Copy link
Author

from virtualizarr import open_virtual_dataset, open_virtual_mfdataset
from virtualizarr.parsers import HDFParser
from virtualizarr.registry import ObjectStoreRegistry

from obstore.store import from_url


##https://thredds.nci.org.au/thredds/fileServer/gb6/BRAN/BRAN2020/daily/ocean_salt_1993_01.nc

bucket  = 'https://thredds.nci.org.au'
path = 'thredds/fileServer/gb6/BRAN/BRAN2020/daily/ocean_salt_1993_01.nc'
store = from_url(bucket)
registry = ObjectStoreRegistry({bucket: store})
parser = HDFParser()

url_1 = "https://thredds.nci.org.au/thredds/fileServer/gb6/BRAN/BRAN2020/daily/ocean_salt_1993_01.nc"
url_2 = "https://thredds.nci.org.au/thredds/fileServer/gb6/BRAN/BRAN2020/daily/ocean_salt_1993_02.nc"

combined_vds = open_virtual_mfdataset(
  [url_1, url_2],
  registry=registry,
  parser=parser,
  combine="nested",
  concat_dim="Time", 
  loadable_variables = ["Time", "st_ocean", "xt_ocean", "yt_ocean"], 
  parallel = "dask", 
  drop_variables = ["Time_bounds", "average_DT", "average_T1", "average_T2", "nv"]
)

@mdsumner
Copy link
Author

mdsumner commented Sep 11, 2025

in situ, gettin there

this worked,

#docker run --rm -ti -v /perm_storage/home/mdsumner/bran:/bran ghcr.io/mdsumner/gdal-builds:dev-python
#. /workenv/bin/activate


import pathlib
import obstore
import virtualizarr

store = obstore.store.LocalStore(prefix=pathlib.Path.cwd())

file = "/bran/ocean_salt_2023_11.nc"
registry = virtualizarr.registry.ObjectStoreRegistry({f"file:////{file}": store})
parser = virtualizarr.parsers.HDFParser()

vds = virtualizarr.open_virtual_dataset(
  file,
  registry=registry,
  parser=parser,
 # combine="nested",
 # concat_dim="Time", 
  loadable_variables = ["Time", "st_ocean", "xt_ocean", "yt_ocean"], 
 # parallel = "dask", 
  drop_variables = ["Time_bounds", "average_DT", "average_T1", "average_T2", "nv"]
)

not sure with this yet

#docker run --rm -ti -v /perm_storage/home/mdsumner/bran:/bran ghcr.io/mdsumner/gdal-builds:dev-python
#. /workenv/bin/activate


import pathlib
import obstore
import virtualizarr

store = obstore.store.LocalStore(prefix=pathlib.Path.cwd())
## I don't understand why the registry takes a single file when it is used for multiple below
file = ["/bran/ocean_salt_2023_11.nc", "/bran/ocean_salt_2023_12.nc"]
registry = virtualizarr.registry.ObjectStoreRegistry({f"file:////{file}": store})
parser = virtualizarr.parsers.HDFParser()

vds = virtualizarr.open_virtual_mfdataset(
  file,
  registry=registry,
  parser=parser,
  combine="nested",
  concat_dim="Time", 
  loadable_variables = ["Time", "st_ocean", "xt_ocean", "yt_ocean"], 
  parallel = "dask", 
  drop_variables = ["Time_bounds", "average_DT", "average_T1", "average_T2", "nv"]
)

@mdsumner
Copy link
Author

mdsumner commented Sep 11, 2025

now with concurrent.futures

#qsub -I   -P gv90 -l walltime=01:35:00,ncpus=24,mem=124GB,jobfs=12GB,storage=gdata/gb6+scratch/gv90

#module load singularity
#singularity shell /scratch/gv90/mds581/software/sif_lib/gdal-builds_dev-python.sif

d = ["1993_01", "1993_02", "1993_03", "1993_04", "1993_05", "1993_06",
"1993_07", "1993_08", "1993_09", "1993_10", "1993_11", "1993_12",
"1994_01", "1994_02", "1994_03", "1994_04", "1994_05", "1994_06",
"1994_07", "1994_08", "1994_09", "1994_10", "1994_11", "1994_12",
"1995_01", "1995_02", "1995_03", "1995_04", "1995_05", "1995_06",
"1995_07", "1995_08", "1995_09", "1995_10", "1995_11", "1995_12",
"1996_01", "1996_02", "1996_03", "1996_04", "1996_05", "1996_06",
"1996_07", "1996_08", "1996_09", "1996_10", "1996_11", "1996_12",
"1997_01", "1997_02", "1997_03", "1997_04", "1997_05", "1997_06",
"1997_07", "1997_08", "1997_09", "1997_10", "1997_11", "1997_12",
"1998_01", "1998_02", "1998_03", "1998_04", "1998_05", "1998_06",
"1998_07", "1998_08", "1998_09", "1998_10", "1998_11", "1998_12",
"1999_01", "1999_02", "1999_03", "1999_04", "1999_05", "1999_06",
"1999_07", "1999_08", "1999_09", "1999_10", "1999_11", "1999_12",
"2000_01", "2000_02", "2000_03", "2000_04", "2000_05", "2000_06",
"2000_07", "2000_08", "2000_09", "2000_10", "2000_11", "2000_12",
"2001_01", "2001_02", "2001_03", "2001_04", "2001_05", "2001_06",
"2001_07", "2001_08", "2001_09", "2001_10", "2001_11", "2001_12",
"2002_01", "2002_02", "2002_03", "2002_04", "2002_05", "2002_06",
"2002_07", "2002_08", "2002_09", "2002_10", "2002_11", "2002_12",
"2003_01", "2003_02", "2003_03", "2003_04", "2003_05", "2003_06",
"2003_07", "2003_08", "2003_09", "2003_10", "2003_11", "2003_12",
"2004_01", "2004_02", "2004_03", "2004_04", "2004_05", "2004_06",
"2004_07", "2004_08", "2004_09", "2004_10", "2004_11", "2004_12",
"2005_01", "2005_02", "2005_03", "2005_04", "2005_05", "2005_06",
"2005_07", "2005_08", "2005_09", "2005_10", "2005_11", "2005_12",
"2006_01", "2006_02", "2006_03", "2006_04", "2006_05", "2006_06",
"2006_07", "2006_08", "2006_09", "2006_10", "2006_11", "2006_12",
"2007_01", "2007_02", "2007_03", "2007_04", "2007_05", "2007_06",
"2007_07", "2007_08", "2007_09", "2007_10", "2007_11", "2007_12",
"2008_01", "2008_02", "2008_03", "2008_04", "2008_05", "2008_06",
"2008_07", "2008_08", "2008_09", "2008_10", "2008_11", "2008_12",
"2009_01", "2009_02", "2009_03", "2009_04", "2009_05", "2009_06",
"2009_07", "2009_08", "2009_09", "2009_10", "2009_11", "2009_12",
"2010_01", "2010_02", "2010_03", "2010_04", "2010_05", "2010_06",
"2010_07", "2010_08", "2010_09", "2010_10", "2010_11", "2010_12",
"2011_01", "2011_02", "2011_03", "2011_04", "2011_05", "2011_06",
"2011_07", "2011_08", "2011_09", "2011_10", "2011_11", "2011_12",
"2012_01", "2012_02", "2012_03", "2012_04", "2012_05", "2012_06",
"2012_07", "2012_08", "2012_09", "2012_10", "2012_11", "2012_12",
"2013_01", "2013_02", "2013_03", "2013_04", "2013_05", "2013_06",
"2013_07", "2013_08", "2013_09", "2013_10", "2013_11", "2013_12",
"2014_01", "2014_02", "2014_03", "2014_04", "2014_05", "2014_06",
"2014_07", "2014_08", "2014_09", "2014_10", "2014_11", "2014_12",
"2015_01", "2015_02", "2015_03", "2015_04", "2015_05", "2015_06",
"2015_07", "2015_08", "2015_09", "2015_10", "2015_11", "2015_12",
"2016_01", "2016_02", "2016_03", "2016_04", "2016_05", "2016_06",
"2016_07", "2016_08", "2016_09", "2016_10", "2016_11", "2016_12",
"2017_01", "2017_02", "2017_03", "2017_04", "2017_05", "2017_06",
"2017_07", "2017_08", "2017_09", "2017_10", "2017_11", "2017_12",
"2018_01", "2018_02", "2018_03", "2018_04", "2018_05", "2018_06",
"2018_07", "2018_08", "2018_09", "2018_10", "2018_11", "2018_12",
"2019_01", "2019_02", "2019_03", "2019_04", "2019_05", "2019_06",
"2019_07", "2019_08", "2019_09", "2019_10", "2019_11", "2019_12",
"2020_01", "2020_02", "2020_03", "2020_04", "2020_05", "2020_06",
"2020_07", "2020_08", "2020_09", "2020_10", "2020_11", "2020_12",
"2021_01", "2021_02", "2021_03", "2021_04", "2021_05", "2021_06",
"2021_07", "2021_08", "2021_09", "2021_10", "2021_11", "2021_12",
"2022_01", "2022_02", "2022_03", "2022_04", "2022_05", "2022_06",
"2022_07", "2022_08", "2022_09", "2022_10", "2022_11", "2022_12",
"2023_01", "2023_02", "2023_03", "2023_04", "2023_05", "2023_06",
"2023_07", "2023_08", "2023_09", "2023_10", "2023_11", "2023_12"]

# really nutty python mess here, will need to review obstore I think
store_path = "/g/data/gb6/BRAN/BRAN2020/daily"

ff = [f"file://{store_path}/ocean_salt_{dd}.nc" for dd in d]

import xarray as xr
from obstore.store import LocalStore
from concurrent.futures import ThreadPoolExecutor

from virtualizarr import open_virtual_dataset, open_virtual_mfdataset
from virtualizarr.parsers import HDFParser
from virtualizarr.registry import ObjectStoreRegistry
import time


file_url = ff

store = LocalStore(prefix=store_path)
registry = ObjectStoreRegistry({"file:///": store})

parser = HDFParser()
s = time.time()

loadvars = ["Time", "st_ocean", "xt_ocean", "yt_ocean"]
dropvars = ["Time_bounds", "average_DT", "average_T1", "average_T2", "nv", "st_edges_ocean"]
vds = open_virtual_mfdataset(
  file_url,
  parser=parser,
  registry=registry,
  combine="nested",
  concat_dim="Time",
  parallel = ThreadPoolExecutor, 
  drop_variables = dropvars, 
  loadable_variables = loadvars
)

e = time.time()
print(e - s)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment