Created
March 15, 2013 04:52
-
-
Save justinko/5167583 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'sequel' | |
require 'csv' | |
def parse_data(file_name) | |
CSV.read(file_name, headers: :first_row).to_a | |
end | |
db = Sequel.sqlite | |
db.create_table! :players do | |
string :id | |
integer :birth_year | |
string :first_name | |
string :last_name | |
index :id | |
end | |
db.create_table! :statistics do | |
string :player_id | |
integer :year | |
string :team | |
integer :games | |
integer :at_bats | |
integer :runs | |
integer :hits | |
integer :doubles | |
integer :triples | |
integer :home_runs | |
integer :runs_batted_in | |
integer :stolen_bases | |
integer :caught_stealing | |
index :player_id | |
end | |
db[:players].import db[:players].columns, parse_data('Master-small.csv') | |
db[:statistics].import db[:statistics].columns, parse_data('Batting-07-12.csv') | |
data = db[<<-SQL].all | |
SELECT | |
(p.first_name || ' ' || p.last_name) AS name | |
,s.player_id | |
,SUM(IFNULL(s_2009.at_bats, 0)) [2009_at_bats] | |
,SUM(IFNULL(s_2009.hits, 0)) [2009_hits] | |
,SUM(IFNULL(s_2010.at_bats, 0)) [2010_at_bats] | |
,SUM(IFNULL(s_2010.hits, 0)) [2010_hits] | |
,ROUND(SUM(IFNULL(s_2009.batting_average, 0.0)), 4) [2009_batting_average] | |
,ROUND(SUM(IFNULL(s_2010.batting_average, 0.0)), 4) [2010_batting_average] | |
,ROUND(SUM(IFNULL(s_2010.batting_average, 0.0)) - SUM(IFNULL(s_2009.batting_average, 0.0)), 4) batting_average_difference | |
FROM statistics s | |
LEFT OUTER JOIN ( | |
SELECT | |
player_id | |
,year | |
,SUM(at_bats) as at_bats | |
,SUM(hits) as hits | |
,SUM(CAST(hits AS REAL)) / SUM(at_bats) as batting_average | |
FROM statistics | |
WHERE year = 2009 | |
GROUP BY player_id, year | |
) s_2009 ON (s_2009.Player_id = s.Player_id AND s_2009.year = s.year) | |
LEFT OUTER JOIN ( | |
SELECT | |
player_id | |
,year | |
,SUM(at_bats) as at_bats | |
,SUM(hits) as hits | |
,SUM(CAST(hits AS REAL)) / SUM(at_bats) as batting_average | |
FROM statistics | |
WHERE year = 2010 | |
GROUP BY player_id, year | |
) s_2010 ON s_2010.player_id = s.player_id and s_2010.year = s.year | |
LEFT OUTER JOIN players p ON p.id = s.player_id | |
GROUP BY s.player_id | |
HAVING SUM(s_2009.at_bats) >= 200 AND SUM(s_2010.at_bats) >= 200 | |
ORDER BY batting_average_difference DESC | |
SQL | |
data.each do |stat| | |
puts [ | |
"#{stat[:name]} (#{stat[:player_id]})".ljust(37), | |
"2009 AB: #{stat[:'2009_at_bats']}", | |
"2009 H: #{stat[:'2009_hits']}", | |
"2010 AB: #{stat[:'2010_at_bats']}", | |
"2010 H: #{stat[:'2010_hits']}", | |
"2009 BA: #{stat[:'2009_batting_average']}", | |
"2010 BA: #{stat[:'2010_batting_average']}", | |
"BA DIFF: #{stat[:'batting_average_difference']}" | |
].map {|str| str.ljust(20) }.join | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment