quinnj · October 15, 2015 22:09
diff --git a/gistfile1.txt b/gistfile1.txt
 type Table{T} <: Source
    schema::Schema
    index::Vector{Int}
    ints::Vector{NullableVector{Int}}
    floats::Vector{NullableVector{Float64}}
    ptrstrings::Vector{NullableVector{PointerString}}
    strings::Vector{NullableVector{UTF8String}}
    dates::Vector{NullableVector{Date}}
    datetimes::Vector{NullableVector{DateTime}}
    any::Vector{NullableVector{Any}}
    other::Any # sometimes you just need to keep a reference around...
 end

 type D{T}
    data::Dict{Type{T},Vector{NullableVector{T}}}
 end

 # Constructors
 function DataTable(schema::Schema,other=0)
    # allocate data
    rows, cols = size(schema)
    ints = NullableVector{Int}[]
    floats = NullableVector{Float64}[]
    ptrstrings = NullableVector{PointerString}[]
    strings = NullableVector{UTF8String}[]
    dates = NullableVector{Date}[]
    datetimes = NullableVector{DateTime}[]
    any = NullableVector{Any}[]
    index = Array(Int,cols)
    for col = 1:cols
        T = schema.types[col]
        if T == Int
            push!(ints,NullableArray(T, rows))
            index[col] = length(ints)
        elseif T == Float64
            push!(floats,NullableArray(T, rows))
            index[col] = length(floats)
        elseif T == PointerString
            push!(ptrstrings,NullableArray(T, rows))
            index[col] = length(ptrstrings)
        elseif T <: AbstractString
            push!(strings,NullableArray(UTF8String, rows))
            index[col] = length(strings)
        elseif T == Date
            push!(dates,NullableArray(T, rows))
            index[col] = length(dates)
        elseif T == DateTime
            push!(datetimes,NullableArray(T, rows))
            index[col] = length(datetimes)
        else
            push!(any,NullableArray(T, rows))
            index[col] = length(any)
        end
    end
    return DataTable(schema,index,ints,floats,ptrstrings,strings,dates,datetimes,any,other)
 end

 DataTable(types::Vector{DataType},rows::Int,other=0) = DataTable(Schema(types,rows),other)
 DataTable(source::Source) = DataTable(schema(source))

 # Interface

 # convert to DataFrame
 function DataFrames.DataFrame(dt::DataStreams.DataTable)
    cols = dt.schema.cols
    data = Array(Any,cols)
    types = DataStreams.types(dt)
    for i = 1:cols
        data[i] = DataStreams.column(dt,i,types[i])
    end
    return DataFrame(data,Symbol[symbol(x) for x in dt.schema.header])
 end

 # column access
 export column
 function column(dt::DataTable, j, T)
    (0 < j < length(dt.index)+1) || throw(ArgumentError("column index $i out of range"))
    return unsafe_column(dt, j, T)
 end
 @inline unsafe_column(dt::DataTable, j, ::Type{Int64}) = (@inbounds col = dt.ints[dt.index[j]]; return col)
 @inline unsafe_column(dt::DataTable, j, ::Type{Float64}) = (@inbounds col = dt.floats[dt.index[j]]; return col)
 @inline unsafe_column(dt::DataTable, j, ::Type{PointerString}) = (@inbounds col = dt.ptrstrings[dt.index[j]]; return col)
 @inline unsafe_column{T<:AbstractString}(dt::DataTable, j, ::Type{T}) = (@inbounds col = dt.strings[dt.index[j]]; return col)
 @inline unsafe_column(dt::DataTable, j, ::Type{Date}) = (@inbounds col = dt.dates[dt.index[j]]; return col)
 @inline unsafe_column(dt::DataTable, j, ::Type{DateTime}) = (@inbounds col = dt.datetimes[dt.index[j]]; return col)
 @inline unsafe_column(dt::DataTable, j, T) = (@inbounds col = dt.any[dt.index[j]]; return col)

 # cell indexing
 function Base.getindex(dt::DataTable, i, j)
    col = column(dt, j, types(dt)[j])
    return col[i]
 end
	type Table{T} <: Source
	schema::Schema
	index::Vector{Int}
	ints::Vector{NullableVector{Int}}
	floats::Vector{NullableVector{Float64}}
	ptrstrings::Vector{NullableVector{PointerString}}
	strings::Vector{NullableVector{UTF8String}}
	dates::Vector{NullableVector{Date}}
	datetimes::Vector{NullableVector{DateTime}}
	any::Vector{NullableVector{Any}}
	other::Any # sometimes you just need to keep a reference around...
	end

	type D{T}
	data::Dict{Type{T},Vector{NullableVector{T}}}
	end

	# Constructors
	function DataTable(schema::Schema,other=0)
	# allocate data
	rows, cols = size(schema)
	ints = NullableVector{Int}[]
	floats = NullableVector{Float64}[]
	ptrstrings = NullableVector{PointerString}[]
	strings = NullableVector{UTF8String}[]
	dates = NullableVector{Date}[]
	datetimes = NullableVector{DateTime}[]
	any = NullableVector{Any}[]
	index = Array(Int,cols)
	for col = 1:cols
	T = schema.types[col]
	if T == Int
	push!(ints,NullableArray(T, rows))
	index[col] = length(ints)
	elseif T == Float64
	push!(floats,NullableArray(T, rows))
	index[col] = length(floats)
	elseif T == PointerString
	push!(ptrstrings,NullableArray(T, rows))
	index[col] = length(ptrstrings)
	elseif T <: AbstractString
	push!(strings,NullableArray(UTF8String, rows))
	index[col] = length(strings)
	elseif T == Date
	push!(dates,NullableArray(T, rows))
	index[col] = length(dates)
	elseif T == DateTime
	push!(datetimes,NullableArray(T, rows))
	index[col] = length(datetimes)
	else
	push!(any,NullableArray(T, rows))
	index[col] = length(any)
	end
	end
	return DataTable(schema,index,ints,floats,ptrstrings,strings,dates,datetimes,any,other)
	end

	DataTable(types::Vector{DataType},rows::Int,other=0) = DataTable(Schema(types,rows),other)
	DataTable(source::Source) = DataTable(schema(source))

	# Interface

	# convert to DataFrame
	function DataFrames.DataFrame(dt::DataStreams.DataTable)
	cols = dt.schema.cols
	data = Array(Any,cols)
	types = DataStreams.types(dt)
	for i = 1:cols
	data[i] = DataStreams.column(dt,i,types[i])
	end
	return DataFrame(data,Symbol[symbol(x) for x in dt.schema.header])
	end

	# column access
	export column
	function column(dt::DataTable, j, T)
	(0 < j < length(dt.index)+1) \|\| throw(ArgumentError("column index $i out of range"))
	return unsafe_column(dt, j, T)
	end
	@inline unsafe_column(dt::DataTable, j, ::Type{Int64}) = (@inbounds col = dt.ints[dt.index[j]]; return col)
	@inline unsafe_column(dt::DataTable, j, ::Type{Float64}) = (@inbounds col = dt.floats[dt.index[j]]; return col)
	@inline unsafe_column(dt::DataTable, j, ::Type{PointerString}) = (@inbounds col = dt.ptrstrings[dt.index[j]]; return col)
	@inline unsafe_column{T<:AbstractString}(dt::DataTable, j, ::Type{T}) = (@inbounds col = dt.strings[dt.index[j]]; return col)
	@inline unsafe_column(dt::DataTable, j, ::Type{Date}) = (@inbounds col = dt.dates[dt.index[j]]; return col)
	@inline unsafe_column(dt::DataTable, j, ::Type{DateTime}) = (@inbounds col = dt.datetimes[dt.index[j]]; return col)
	@inline unsafe_column(dt::DataTable, j, T) = (@inbounds col = dt.any[dt.index[j]]; return col)

	# cell indexing
	function Base.getindex(dt::DataTable, i, j)
	col = column(dt, j, types(dt)[j])
	return col[i]
	end