Skip to content

Instantly share code, notes, and snippets.

@kglspl
Created January 8, 2024 17:54
Show Gist options
  • Save kglspl/c8ede1699e078f3ca04e42c2862a676f to your computer and use it in GitHub Desktop.
Save kglspl/c8ede1699e078f3ca04e42c2862a676f to your computer and use it in GitHub Desktop.
Easier interface to h5py datasets
import h5py
# Copyright (c) 2023 kglspl
# MIT License (the same as: https://github.com/kglspl/ppmparser/blob/master/LICENSE)
class H5FS(object):
def __init__(self, filename, mode):
self.filename = filename
self.f = h5py.File(filename, mode)
self.dset = None
def open(self, name=None):
dset_info = self._h5_get_dataset_info(name)
if dset_info is None:
if name is None:
raise Exception(f"No datasets found in {self.filename}")
else:
raise Exception(f"Dataset {name} not found in {self.filename}")
self.dset = self.f.require_dataset(**dset_info)
return self
def require_dataset(self, name, shape, dtype, **kwargs):
return self.f.require_dataset(name, shape, dtype, **kwargs)
# from: https://stackoverflow.com/a/53340677
def _h5_get_dataset_info(self, requested_name=None, obj=None):
if obj is None:
obj = self.f["/"]
if type(obj) in [h5py._hl.group.Group, h5py._hl.files.File]:
for key in obj.keys():
result = self._h5_get_dataset_info(obj=obj[key])
if result is not None:
return result
elif type(obj) == h5py._hl.dataset.Dataset:
if requested_name is None or obj.name == requested_name:
return {
"name": obj.name,
"shape": obj.shape,
"dtype": obj.dtype,
"chunks": obj.chunks,
}
return None
def close(self):
self.f.close()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
@kglspl
Copy link
Author

kglspl commented Jan 8, 2024

This H5FS class makes opening the .h5 file much more convenient (imho). After opening the file, it exposes .dset (normal h5py dataset object) for further manipulation.

Main use cases:

  • reading an existing file with only a single dataset:
    with H5FS('/path/to/file.h5', 'r').open() as scroll:
        # example usage - read from dataset into a numpy array, as you would with h5py directly:
        a = scroll.dset[4000:4500, 4000:4500, 7000:7100]
  • reading an existing file, but we have a specific dataset from the file in mind:
    with H5FS('/path/to/file.h5', 'r').open('mydataset') as scroll:
        pass
  • creating a file or writing to an existing one if it exists:
    with H5FS('/path/to/file.h5', 'r').require_dataset('mydataset', shape=(10000, 20000, 50000), dtype=np.uint16, chunks=(250, 250, 250)) as scroll:
        # now we can also write to dataset:
        scroll.dset[4000:4500, 4000:4500, 7000:7100] = a

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment