Here is an extension to JLD to try to support reading of DataFrames. Writing seems to be okay.
import HDF5.read, JLD.JldFile, JLD.getrefs
function read{D<:AbstractDataFrame}(obj::HDF5Dataset{JldFile}, ::Type{D})
kv = getrefs(obj, Any)
keys = kv[1]
vals = kv[2]
d = D()
for (k,v) in zip(keys, vals)
d[k] = v
end
d
end
Here is an example of use:
julia> d = {:a => [1:5], :b => pi*[2:6]}
{:b=>[6.28319, 9.42478, 12.5664, 15.708, 18.8496],:a=>[1, 2, 3, 4, 5]}
julia> df = DataFrame({[2:6], pi*[1:5]}) # arrays only
5x2 DataFrame:
x1 x2
[1,] 2 3.14159
[2,] 3 6.28319
[3,] 4 9.42478
[4,] 5 12.5664
[5,] 6 15.708
julia> df2 = @DataFrame(a => [1:5], b => pi * [1:5]) # DataArray columns
5x2 DataFrame:
a b
[1,] 1 3.14159
[2,] 2 6.28319
[3,] 3 9.42478
[4,] 4 12.5664
[5,] 5 15.708
julia> file = jldopen("mydata.jld", "w")
Julia data file version 0.0.1: mydata.jld
julia> write(file, "d", d)
julia> write(file, "df", df)
julia> write(file, "df2", df2)
julia> close(file)
julia> file = jldopen("mydata.jld", "r")
Julia data file version 0.0.1: mydata.jld
julia> x = read(file, "d")
{:b=>[6.28319, 9.42478, 12.5664, 15.708, 18.8496],:a=>[1, 2, 3, 4, 5]}
julia> x = read(file, "df")
HDF5-DIAG: Error detected in HDF5 (1.8.4-patch1) thread 139749181699904:
#000: ../../../src/H5A.c line 546 in H5Aopen(): unable to load attribute info from object header
major: Attribute
minor: Unable to initialize object
#001: ../../../src/H5Oattribute.c line 533 in H5O_attr_open_by_name(): can't locate attribute
major: Attribute
minor: Object not found
HDF5-DIAG: Error detected in HDF5 (1.8.4-patch1) thread 139749181699904:
#000: ../../../src/H5A.c line 1385 in H5Aget_name(): not an attribute
major: Invalid arguments to routine
minor: Inappropriate type
ERROR: Error getting attribute name
in h5a_get_name at /home/tshort/.julia/HDF5/src/plain.jl:1699
in h5a_get_name at /home/tshort/.julia/HDF5/src/plain.jl:1749
in h5a_open at /home/tshort/.julia/HDF5/src/plain.jl:1699
in a_read at /home/tshort/.julia/HDF5/src/plain.jl:922
in read at /home/tshort/.julia/HDF5/src/jld.jl:330
in read at /home/tshort/.julia/HDF5/src/jld.jl:176
in read at /home/tshort/.julia/HDF5/src/plain.jl:934
julia> read(file["df"], DataFrame) # works for Array columns
5x2 DataFrame:
x1 x2
[1,] 2 3.14159
[2,] 3 6.28319
[3,] 4 9.42478
[4,] 5 12.5664
[5,] 6 15.708
julia> read(file["df2"], DataFrame) # broken for DataArray columns
ERROR: no method start(Symbol,)
in read at /home/tshort/.julia/HDF5/src/jld.jl:333
in read at /home/tshort/.julia/HDF5/src/jld.jl:176
in getrefs at /home/tshort/.julia/HDF5/src/jld.jl:358
in read at /home/tshort/.julia/HDF5/src/jld.jl:302
in read at /home/tshort/.julia/HDF5/src/jld.jl:176
in getrefs at /home/tshort/.julia/HDF5/src/jld.jl:358
in read at /tmp/julia_eval_region.jl:4