Created
September 29, 2017 19:21
-
-
Save nicain/089ecc326d7970f5134713c4a12949ff to your computer and use it in GitHub Desktop.
serialize xarray to h5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import h5py | |
| import xarray as xr | |
| import os | |
| import types | |
| import json | |
| # https://github.com/pydata/xarray/issues/1599 | |
| # https://github.com/nicain/xarray/tree/feature/1599-to_dict_numpy | |
| # https://codereview.stackexchange.com/questions/120802/recursively-save-python-dictionaries-to-hdf5-files-using-h5py | |
| ds1 = xr.DataArray(np.zeros((10,10)), dims=('x', 'y'), coords={'x':np.arange(10), 'y':np.arange(10)}) | |
| DD = ds1.to_dict(tolist=False) | |
| def __save_dict_to_hdf5__(dic, filename): | |
| """ | |
| Save a dictionary whose contents are only strings, np.float64, np.int64, | |
| np.ndarray, and other dictionaries following this structure | |
| to an HDF5 file. These are the sorts of dictionaries that are meant | |
| to be produced by the ReportInterface__to_dict__() method. | |
| """ | |
| assert not os.path.exists(filename), 'this is a noclobber operation bud' | |
| with h5py.File(filename, 'w') as h5file: | |
| __recursively_save_dict_contents_to_group__(h5file, '/', dic) | |
| def __recursively_save_dict_contents_to_group__(h5file, path, dic): | |
| """ | |
| Take an already open HDF5 file and insert the contents of a dictionary | |
| at the current path location. Can call itself recursively to fill | |
| out HDF5 files with the contents of a dictionary. | |
| """ | |
| assert isinstance(dic, dict), "must provide a dictionary" | |
| assert isinstance(path, str), "path must be a string" | |
| assert isinstance(h5file, h5py._hl.files.File), "must be an open h5py file" | |
| for key in dic: | |
| assert isinstance(key, str)#, "path must be a string", 'dict keys must be strings to save to hdf5' | |
| if isinstance(dic[key], (np.int64, np.float64, str)): | |
| h5file[path + key] = dic[key] | |
| assert h5file[path + key].value == dic[key]#, 'The data representation in the HDF5 file does not match the original dict.' | |
| elif isinstance(dic[key], np.ndarray): | |
| h5file[path + key] = dic[key] | |
| assert np.array_equal(h5file[path + key].value, dic[key])#, 'The data representation in the HDF5 file does not match the original dict.' | |
| elif isinstance(dic[key], dict): | |
| __recursively_save_dict_contents_to_group__(h5file, path + key + '/', dic[key]) | |
| def __load_dict_from_hdf5__(filename): | |
| """ | |
| Load a dictionary whose contents are only strings, floats, ints, | |
| numpy arrays, and other dictionaries following this structure | |
| from an HDF5 file. These dictionaries can then be used to reconstruct | |
| ReportInterface subclass instances using the | |
| ReportInterface.__from_dict__() method. | |
| """ | |
| with h5py.File(filename, 'r') as h5file: | |
| return __recursively_load_dict_contents_from_group__(h5file, '/') | |
| def __recursively_load_dict_contents_from_group__(h5file, path): | |
| """ | |
| Load contents of an HDF5 group. If further groups are encountered, | |
| treat them like dicts and continue to load them recursively. | |
| """ | |
| ans = {} | |
| for key, item in h5file[path].items(): | |
| if isinstance(item, h5py._hl.dataset.Dataset): | |
| ans[key] = item.value | |
| elif isinstance(item, h5py._hl.group.Group): | |
| ans[key] = __recursively_load_dict_contents_from_group__(h5file, path + key + '/') | |
| return ans | |
| __save_dict_to_hdf5__(DD, 'example.h5') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Tested on python3.5