Skip to content

Instantly share code, notes, and snippets.

@nalimilan
Created July 11, 2017 18:37
Show Gist options
  • Save nalimilan/bf67ade5af82175954365085155d3870 to your computer and use it in GitHub Desktop.
Save nalimilan/bf67ade5af82175954365085155d3870 to your computer and use it in GitHub Desktop.
using DataTables, StatsBase
function random_frame(nrow::Int, col_values::Dict{Symbol, Any})
DataTable(Any[isa(col_values[key], CategoricalArray) ?
categorical(sample(col_values[key], nrow)) :
NullableArray(sample(col_values[key], nrow)) for key in keys(col_values)],
keys(col_values) |> collect)
end
function random_join(kind::Symbol, nrow_left::Int, nrow_right::Int,
on_col_values::Dict{Symbol, Any},
left_col_values::Dict{Symbol, Any},
right_col_values::Dict{Symbol, Any})
dfl = random_frame(nrow_left, merge(on_col_values, left_col_values))
dfr = random_frame(nrow_right, merge(on_col_values, right_col_values))
join(dfl, dfr, on = keys(on_col_values) |> collect, kind = kind)
end
function f(n::Int)
for i in 1:n
r = random_join(:outer, 1000, 2000,
Dict{Symbol,Any}(:A => 1:10, :B => NullableArray([:A, :B, :C, :D]),
:C => 1:10, :D => 1:10),
Dict{Symbol,Any}(:E => 1:10, :F => NullableArray([:A, :B, :C, :D])),
Dict{Symbol,Any}(:G => 1:10, :H => NullableArray([:A, :B, :C, :D])))
end
end
f(1)
@time f(100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment