Skip to content

Instantly share code, notes, and snippets.

@nicain
Created September 29, 2017 19:21
Show Gist options
  • Save nicain/089ecc326d7970f5134713c4a12949ff to your computer and use it in GitHub Desktop.
Save nicain/089ecc326d7970f5134713c4a12949ff to your computer and use it in GitHub Desktop.
serialize xarray to h5
import numpy as np
import h5py
import xarray as xr
import os
import types
import json
# https://github.com/pydata/xarray/issues/1599
# https://github.com/nicain/xarray/tree/feature/1599-to_dict_numpy
# https://codereview.stackexchange.com/questions/120802/recursively-save-python-dictionaries-to-hdf5-files-using-h5py
ds1 = xr.DataArray(np.zeros((10,10)), dims=('x', 'y'), coords={'x':np.arange(10), 'y':np.arange(10)})
DD = ds1.to_dict(tolist=False)
def __save_dict_to_hdf5__(dic, filename):
"""
Save a dictionary whose contents are only strings, np.float64, np.int64,
np.ndarray, and other dictionaries following this structure
to an HDF5 file. These are the sorts of dictionaries that are meant
to be produced by the ReportInterface__to_dict__() method.
"""
assert not os.path.exists(filename), 'this is a noclobber operation bud'
with h5py.File(filename, 'w') as h5file:
__recursively_save_dict_contents_to_group__(h5file, '/', dic)
def __recursively_save_dict_contents_to_group__(h5file, path, dic):
"""
Take an already open HDF5 file and insert the contents of a dictionary
at the current path location. Can call itself recursively to fill
out HDF5 files with the contents of a dictionary.
"""
assert isinstance(dic, dict), "must provide a dictionary"
assert isinstance(path, str), "path must be a string"
assert isinstance(h5file, h5py._hl.files.File), "must be an open h5py file"
for key in dic:
assert isinstance(key, str)#, "path must be a string", 'dict keys must be strings to save to hdf5'
if isinstance(dic[key], (np.int64, np.float64, str)):
h5file[path + key] = dic[key]
assert h5file[path + key].value == dic[key]#, 'The data representation in the HDF5 file does not match the original dict.'
elif isinstance(dic[key], np.ndarray):
h5file[path + key] = dic[key]
assert np.array_equal(h5file[path + key].value, dic[key])#, 'The data representation in the HDF5 file does not match the original dict.'
elif isinstance(dic[key], dict):
__recursively_save_dict_contents_to_group__(h5file, path + key + '/', dic[key])
def __load_dict_from_hdf5__(filename):
"""
Load a dictionary whose contents are only strings, floats, ints,
numpy arrays, and other dictionaries following this structure
from an HDF5 file. These dictionaries can then be used to reconstruct
ReportInterface subclass instances using the
ReportInterface.__from_dict__() method.
"""
with h5py.File(filename, 'r') as h5file:
return __recursively_load_dict_contents_from_group__(h5file, '/')
def __recursively_load_dict_contents_from_group__(h5file, path):
"""
Load contents of an HDF5 group. If further groups are encountered,
treat them like dicts and continue to load them recursively.
"""
ans = {}
for key, item in h5file[path].items():
if isinstance(item, h5py._hl.dataset.Dataset):
ans[key] = item.value
elif isinstance(item, h5py._hl.group.Group):
ans[key] = __recursively_load_dict_contents_from_group__(h5file, path + key + '/')
return ans
__save_dict_to_hdf5__(DD, 'example.h5')
@nicain
Copy link
Author

nicain commented Sep 29, 2017

Tested on python3.5

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment