Skip to content

Instantly share code, notes, and snippets.

@bhawkins
Created September 9, 2018 05:47
Show Gist options
  • Save bhawkins/6edaab42b9907758eeb58a24399b9426 to your computer and use it in GitHub Desktop.
Save bhawkins/6edaab42b9907758eeb58a24399b9426 to your computer and use it in GitHub Desktop.
#!/usr/bin/env julia
# Demonstrate the following features:
# 1) Complex type compatible with h5py and pytables Python modules.
# 2) Create dataset with this type, chunking, etc.
# 3) Write data in pieces, not all at once.
# 4) Read back data in h5py format as native julia complex type.
#
# Brian Hawkins
# 2018-09-08
# MIT license
# Tested with julia 0.7.0
using HDF5
using Random
"Generate HDF5 data type compatible with Python h5py from Julia complex type."
function julia_to_h5py_ctype(::Type{Complex{T}}=ComplexF32) where {T}
id = HDF5.h5t_create(HDF5.H5T_COMPOUND, 2*sizeof(T))
rt = HDF5.hdf5_type_id(T)
HDF5.h5t_insert(id, "r", 0, rt)
HDF5.h5t_insert(id, "i", sizeof(T), rt)
HDF5Datatype(id)
end
"Generate Julia type corresponding to Python h5py complex type."
function h5py_to_julia_ctype(dtype::HDF5Datatype)::Type
cls = HDF5.h5t_get_class(dtype)
@assert cls == HDF5.H5T_COMPOUND
@assert 2 == HDF5.h5t_get_nmembers(dtype)
@assert "r" == HDF5.h5t_get_member_name(dtype.id, 0)
@assert "i" == HDF5.h5t_get_member_name(dtype.id, 1)
rt = HDF5Datatype(HDF5.h5t_get_member_type(dtype, 0))
it = HDF5Datatype(HDF5.h5t_get_member_type(dtype, 1))
T = HDF5.hdf5_to_julia_eltype(rt)
@assert T == HDF5.hdf5_to_julia_eltype(it)
Complex{T}
end
# Generate some complex data.
Random.seed!(1234)
dims = (10, 2)
z = rand(ComplexF32, dims)
fname = "foo.h5"
if isfile(fname) rm(fname) end
h5open(fname, "w") do f
# Get the datatype
dtype = julia_to_h5py_ctype(eltype(z))
# Create a dataset with this type and some options.
filespace = dataspace(dims)
dset = d_create(f, "cpx", dtype, filespace, "chunk", (2,1), "deflate", 6)
# We want to write this chunk of data.
idx = 1:5, 1
chunk = z[idx...]
# Add ten to make it easier to identify this chunk.
chunk .+= 10
# Space corresponding to the size of chunk we want to write at a time.
memspace = dataspace(chunk)
# Select the destination slice in the overall dataset.
slice = HDF5Dataspace(HDF5.hyperslab(dset, idx...))
# Write this chunk.
HDF5.h5d_write(dset, dtype, memspace, slice, HDF5.H5P_DEFAULT, chunk)
# Repeat for rest of array.
idx = 6:10, 1
chunk = z[idx...] .+ 20
memspace = dataspace(chunk)
slice = HDF5Dataspace(HDF5.hyperslab(dset, idx...))
HDF5.h5d_write(dset, dtype, memspace, slice, HDF5.H5P_DEFAULT, chunk)
idx = 1:10, 2
chunk = z[idx...] .+ 30
memspace = dataspace(chunk)
slice = HDF5Dataspace(HDF5.hyperslab(dset, idx...))
HDF5.h5d_write(dset, dtype, memspace, slice, HDF5.H5P_DEFAULT, chunk)
end
# Reading slices is similar to above, so just read all at once.
zr = h5open(fname, "r") do f
dset = d_open(f, "cpx")
dtype = datatype(dset)
T = h5py_to_julia_ctype(dtype)
zr = zeros(T, size(dset))
space = dataspace(zr)
HDF5.h5d_read(dset, dtype, space, space, HDF5.H5P_DEFAULT, zr)
zr
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment