Skip to content

Instantly share code, notes, and snippets.

@jrevels
Last active August 29, 2015 14:23
Show Gist options
  • Save jrevels/d28b7e06684740b269f3 to your computer and use it in GitHub Desktop.
Save jrevels/d28b7e06684740b269f3 to your computer and use it in GitHub Desktop.
params{T}(::Type{T}) = T.parameters
# Parameterize column types and fields
# by utilizing tuples
type DataFrame{C<:Tuple, F<:Tuple}
columns::C
fields::Type{F}
function DataFrame(columns::C, fields::Type{F})
nrows = length(first(columns))
equallengths = true
for i in 2:length(columns)
equallengths &= length(columns[i]) == nrows
end
@assert equallengths
@assert length(columns) == length(params(fields))
return new(columns, fields)
end
end
DataFrame{C<:Tuple,F<:Tuple}(columns::C, fields::Type{F}) = DataFrame{C,F}(columns, fields)
# This macro provides a type-stable
# way to construct DataFrames using
# a syntax similar to the original
# kwargs syntax. Usage:
#
# julia> df = @dframe(:numbers = collect(1:10), :letters = 'a':'j')
# DataFrame{Tuple{Array{Int64,1},StepRange{Char,Int64}},Tuple{:numbers,:letters}}(([1,2,3,4,5,6,7,8,9,10],'a':1:'j'),Tuple{:numbers,:letters})
#
macro dframe(kwargs...)
fields = map(i->i.args[1], kwargs)
cols = Expr(:tuple, map(i->i.args[2], kwargs)...)
return :(DataFrame($cols, Tuple{$(fields...)}))
end
# Type-stable and "type-specific"
# method to index into columns and
# rows by both field and value.
# API:
#
# df[Field{:f}] -> retrieve column with field :f
# df[Col{j}] -> retrieve jth columen
#
# In the below, i can be any index (scalar, range, array, etc.):
#
# df[i, Field{:f}] -> retrieve the ith member(s) of the column
# df[i, Col{j}] -> retrieve the ith member(s) of the jth column
# df[i, :] -> retrieve the entire ith row(s) as a DataFrame
abstract Field{f}
abstract Col{j}
@generated function Base.getindex{C,F,f}(d::DataFrame{C,F}, ::Type{Field{f}})
j = findfirst(params(F), f)
return :(d.columns[$j])
end
@generated function Base.getindex{C,F,j}(d::DataFrame{C,F}, ::Type{Col{j}})
return :(d.columns[$j])
end
@generated function Base.getindex{C}(d::DataFrame{C}, i, ::Colon)
N = length(params(C))
cols = Expr(:tuple, [:(d[i, Col{$x}]) for x in 1:N]...)
return :(DataFrame($cols, d.fields))
end
Base.getindex{f}(d::DataFrame, i, field::Type{Field{f}}) = d[field][i]
Base.getindex{j}(d::DataFrame, i, col::Type{Col{j}}) = d[col][i]
# Type-stable method for adding a column.
# You can't do this in-place given the DataFrames
# implementation used here, but you can at least
# make a new object with the column appended
@generated function Base.hcat{C,T,F,f}(d::DataFrame{C,F}, v::T, ::Type{Field{f}})
F2 = Tuple{params(F)..., f}
C2 = Tuple{params(C)..., T}
return :(DataFrame{$C2,$F2}(tuple(d.columns..., v), $F2))
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment