Created
October 31, 2022 18:26
-
-
Save martindurant/d8fde3992326bde766af695b9fd60c18 to your computer and use it in GitHub Desktop.
Single file kerchunking
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import kerchunk.hdf | |
import kerchunk.combine | |
import fsspec | |
import tarfile | |
import xarray as xr | |
ro = dict( | |
s3={ | |
"anon": True, | |
"client_kwargs": {"endpoint_url": "https://object-store.cloud.muni.cz"} | |
} | |
) | |
# get offsets | |
with fsspec.open("s3://testfred/gridS.tar", **ro["s3"]) as tf: | |
tar = tarfile.TarFile(fileobj=tf) | |
offsets = {ti.name: ti.offset_data for ti in tar.getmembers()} | |
ofs = fsspec.open_files( | |
# should have "first" caching strategy? | |
"tar://*.nc::~/Downloads/gridS.tar", **ro | |
) | |
outs = {} | |
for of in ofs: | |
with of as f: | |
h = kerchunk.hdf.SingleHdf5ToZarr(f) | |
outs[of.path] = h.translate() | |
mods = {} | |
for key, offset in offsets.items(): | |
out = outs[key] | |
fs = fsspec.filesystem( | |
"reference", | |
fo=out, | |
template_overrides={"u": f"tar://{key}"}, | |
remote_options={ | |
"fo": "s3://testfred/gridS.tar", | |
"target_options": ro["s3"] | |
} | |
) | |
mod = fs.references.copy() | |
for k, v in mod.items(): | |
if isinstance(v, list): | |
v[0] = "s3://testfred/gridS.tar" | |
v[1] += offset | |
mods[key] = mod | |
mzz = kerchunk.combine.MultiZarrToZarr( | |
list(mods.values()), | |
remote_options=ro["s3"], | |
remote_protocol="s3", | |
concat_dims=["time_counter"], | |
identical_dims=['nav_lon', 'nav_lat'] | |
) | |
out = mzz.translate() | |
ds = xr.open_zarr( | |
"reference://", | |
storage_options={ | |
"fo": out, | |
"remote_options": ro["s3"] | |
}, | |
consolidated=False | |
) | |
with open("gridS.json", "w") as j: | |
import ujson | |
ujson.dump(out, j) | |
ds.sivolu.mean(dim='time_counter').mean().compute() | |
# 0.9407204 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment