Skip to content

Instantly share code, notes, and snippets.

@justinko
Created March 15, 2013 04:52
Show Gist options
  • Save justinko/5167583 to your computer and use it in GitHub Desktop.
Save justinko/5167583 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'sequel'
require 'csv'
def parse_data(file_name)
CSV.read(file_name, headers: :first_row).to_a
end
db = Sequel.sqlite
db.create_table! :players do
string :id
integer :birth_year
string :first_name
string :last_name
index :id
end
db.create_table! :statistics do
string :player_id
integer :year
string :team
integer :games
integer :at_bats
integer :runs
integer :hits
integer :doubles
integer :triples
integer :home_runs
integer :runs_batted_in
integer :stolen_bases
integer :caught_stealing
index :player_id
end
db[:players].import db[:players].columns, parse_data('Master-small.csv')
db[:statistics].import db[:statistics].columns, parse_data('Batting-07-12.csv')
data = db[<<-SQL].all
SELECT
(p.first_name || ' ' || p.last_name) AS name
,s.player_id
,SUM(IFNULL(s_2009.at_bats, 0)) [2009_at_bats]
,SUM(IFNULL(s_2009.hits, 0)) [2009_hits]
,SUM(IFNULL(s_2010.at_bats, 0)) [2010_at_bats]
,SUM(IFNULL(s_2010.hits, 0)) [2010_hits]
,ROUND(SUM(IFNULL(s_2009.batting_average, 0.0)), 4) [2009_batting_average]
,ROUND(SUM(IFNULL(s_2010.batting_average, 0.0)), 4) [2010_batting_average]
,ROUND(SUM(IFNULL(s_2010.batting_average, 0.0)) - SUM(IFNULL(s_2009.batting_average, 0.0)), 4) batting_average_difference
FROM statistics s
LEFT OUTER JOIN (
SELECT
player_id
,year
,SUM(at_bats) as at_bats
,SUM(hits) as hits
,SUM(CAST(hits AS REAL)) / SUM(at_bats) as batting_average
FROM statistics
WHERE year = 2009
GROUP BY player_id, year
) s_2009 ON (s_2009.Player_id = s.Player_id AND s_2009.year = s.year)
LEFT OUTER JOIN (
SELECT
player_id
,year
,SUM(at_bats) as at_bats
,SUM(hits) as hits
,SUM(CAST(hits AS REAL)) / SUM(at_bats) as batting_average
FROM statistics
WHERE year = 2010
GROUP BY player_id, year
) s_2010 ON s_2010.player_id = s.player_id and s_2010.year = s.year
LEFT OUTER JOIN players p ON p.id = s.player_id
GROUP BY s.player_id
HAVING SUM(s_2009.at_bats) >= 200 AND SUM(s_2010.at_bats) >= 200
ORDER BY batting_average_difference DESC
SQL
data.each do |stat|
puts [
"#{stat[:name]} (#{stat[:player_id]})".ljust(37),
"2009 AB: #{stat[:'2009_at_bats']}",
"2009 H: #{stat[:'2009_hits']}",
"2010 AB: #{stat[:'2010_at_bats']}",
"2010 H: #{stat[:'2010_hits']}",
"2009 BA: #{stat[:'2009_batting_average']}",
"2010 BA: #{stat[:'2010_batting_average']}",
"BA DIFF: #{stat[:'batting_average_difference']}"
].map {|str| str.ljust(20) }.join
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment