Skip to content

Instantly share code, notes, and snippets.

@mrkn
Last active June 30, 2017 02:01
Show Gist options
  • Save mrkn/47c25c3226959e0c0775fc3973c08e2a to your computer and use it in GitHub Desktop.
Save mrkn/47c25c3226959e0c0775fc3973c08e2a to your computer and use it in GitHub Desktop.
require 'daru'
require 'tempfile'
require 'open-uri'
def read_as_dataframe(url)
Tempfile.open('hash_bench') do |tmpfile|
tmpfile.puts "category\tx\ty"
open(url) do |io|
io.each_line do |line|
next unless line =~ /^TABLE/
table, category, x, y = line.split(/\t/)
tmpfile.puts "#{category}\t#{x}\t#{y}"
end
end
Daru::DataFrame.from_csv(tmpfile.path, col_sep: "\t").tap do |df|
df.to_category 'category'
end
end
end
df = read_as_dataframe('http://www.atdot.net/sp/raw/c5n8fo')
dfs = df['category'].categories.map do |category|
df.where(df['category'].eq(category))['x', 'y'].tap do |df1|
df1.rename_vectors('y' => category)
end
end
df = dfs.inject do |df0, df1|
df0 = df0.join(df1, how: :inner, on: ['x'])
end
p df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment