Last active
August 29, 2015 14:11
-
-
Save simonbyrne/3ebdcc6298b1661be19c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Modified version of https://gist.github.com/tshort/9b872f0cd12760d9563d | |
# * Tuple approach uses `Field{:a}()` instead of `Field(:a)` (including in the length of the loop). | |
# * Column `b` is changed to `Int`. | |
module SimonDataFrame | |
type DataFrame{N,D} | |
data::D | |
end | |
immutable Field{s} | |
end | |
Field(s::Symbol) = Field{s}() | |
function DataFrame(;kwds...) | |
names = Any[] | |
data = Any[] | |
types = Any[] | |
for (n, d) in kwds | |
push!(names,n) | |
push!(data,d) | |
push!(types,typeof(d)) | |
end | |
N = tuple(names...) | |
T = tuple(types...) | |
DataFrame{N,T}(tuple(data...)) | |
end | |
stagedfunction getindex{N,D,s}(d::DataFrame{N,D},f::Field{s}) | |
m = Dict(zip(N,1:length(N))) | |
j = m[s] | |
:(d.data[$j]) | |
end | |
stagedfunction getindex{N,D,s}(d::DataFrame{N,D},i::Integer,f::Field{s}) | |
m = Dict(zip(N,1:length(N))) | |
j = m[s] | |
:(d.data[$j][i]) | |
end | |
stagedfunction getindex{N,D}(d::DataFrame{N,D},i::Integer) | |
Expr(:tuple,[:(d.data[$j][i]) for j in 1:length(D)]...) | |
end | |
getindex(d::DataFrame,s::Symbol) = d[Field(s)] | |
getindex(d::DataFrame,i::Int,s::Symbol) = d[i,Field(s)] | |
srand(1) | |
const n = 5_000_000 | |
a = rand(n) | |
b = round(Int,rand(n)) | |
sdf = DataFrame(a = a, b = b) | |
function dot1(df::DataFrame) | |
x = 0.0 | |
for i in 1:length(df[:a]) | |
x += df[:a][i] * df[:a][i] | |
end | |
return x | |
end | |
function dot2(df::DataFrame) | |
x = 0.0 | |
for i in 1:length(df[:a]) | |
x += df[i,:a] * df[i,:a] | |
end | |
return x | |
end | |
function dot3(df::DataFrame) | |
x = 0.0 | |
for i in 1:length(df[Field{:a}()]) | |
x += df[Field{:a}()][i] * df[Field{:a}()][i] | |
end | |
return x | |
end | |
function dot4(df::DataFrame) | |
x = 0.0 | |
for i in 1:length(df[Field{:a}()]) | |
x += df[i,Field{:a}()] * df[i,Field{:a}()] | |
end | |
return x | |
end | |
sdf[1,Field(:a)] | |
@show t1 = @elapsed dot1(sdf) | |
@show t2 = @elapsed dot2(sdf) | |
@show t3 = @elapsed dot3(sdf) | |
@show t4 = @elapsed dot4(sdf) | |
end | |
module CompositeDataFramesTimings | |
using DataArrays, DataFrames | |
using DataFramesMeta ## NOTE: MUST BE THE DEVELOPMENT VERSION | |
srand(1) | |
const n = 5_000_000 | |
a = rand(n) | |
b = round(Int,rand(n)) | |
cdf = CompositeDataFrame(a = a, b = b) | |
df = DataFrame(cdf) | |
function dot1(df::AbstractDataFrame) | |
x = 0.0 | |
for i in 1:size(df, 1) | |
x += df[:a][i] * df[:a][i] | |
end | |
return x | |
end | |
function dot2(df::AbstractDataFrame) | |
x = 0.0 | |
for i in 1:size(df, 1) | |
x += df[i,:a] * df[i,:a] | |
end | |
return x | |
end | |
@show c1 = @elapsed dot1(df) | |
@show c2 = @elapsed dot2(df) | |
@show c1c = @elapsed dot1(cdf) | |
@show c2c = @elapsed dot2(cdf) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment