Last active
September 20, 2022 17:14
-
-
Save martindurant/95a5c67a1a1cd9ea6c67bd515e331613 to your computer and use it in GitHub Desktop.
Single file datasets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import kerchunk.hdf | |
import kerchunk.combine | |
import fsspec | |
import tarfile | |
import xarray as xr | |
ro = dict( | |
s3={ | |
"anon": True, | |
"client_kwargs": {"endpoint_url": "https://object-store.cloud.muni.cz"} | |
} | |
) | |
# get offsets | |
with fsspec.open("s3://testfred/ice.tar", **ro["s3"]) as tf: | |
tar = tarfile.TarFile(fileobj=tf) | |
offsets = {ti.name: ti.offset_data for ti in tar.getmembers()} | |
ofs = fsspec.open_files( | |
# should have "first" caching strategy? | |
"tar://*.nc::s3://testfred/ice.tar", **ro | |
) | |
outs = {} | |
for of in ofs: | |
with of as f: | |
h = kerchunk.hdf.SingleHdf5ToZarr(f) | |
outs[of.path] = h.translate() | |
mods = {} | |
for key, offset in offsets.items(): | |
out = outs[key] | |
fs = fsspec.filesystem( | |
"reference", | |
fo=out, | |
template_overrides={"u": f"tar://{key}"}, | |
remote_options={ | |
"fo": "s3://testfred/ice.tar", | |
"target_options": ro["s3"] | |
} | |
) | |
mod = fs.references.copy() | |
for k, v in mod.items(): | |
if isinstance(v, list): | |
v[0] = "s3://testfred/ice.tar" | |
v[1] += offset | |
mods[key] = mod | |
mzz = kerchunk.combine.MultiZarrToZarr( | |
list(mods.values()), | |
remote_options=ro["s3"], | |
remote_protocol="s3", | |
concat_dims=["time_counter"], | |
identical_dims=['nav_lon','nav_lat'] | |
) | |
out = mzz.translate() | |
ds = xr.open_zarr( | |
"reference://", | |
storage_options={ | |
"fo": out, | |
"remote_options": ro["s3"] | |
}, | |
consolidated=False | |
) | |
with open("icemod.json", "w") as j: | |
import ujson | |
ujson.dump(out, j) | |
ds.sivolu.mean(dim='time_counter').mean().compute() | |
# 0.9407204 |
I updated line19 to 26 as
ofs = fsspec.open_files(
"tar://*.nc::s3://testfred/ice.tar", **ro
)
outs = {}
for of in ofs:
with of as f:
h = kerchunk.hdf.SingleHdf5ToZarr(f,of.path)
outs[of.path] = h.translate()
and I could open the tar file as dataset, thank you!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@martindurant
sorry, I couldn't make it work.
I get err as
I tried
---> 19 h = kerchunk.hdf.SingleHdf5ToZarr(f)
20 outs[of.path] = h.translate()
TypeError: init() missing 1 required positional argument: 'url'