Skip to content

Instantly share code, notes, and snippets.

@bdilday
bdilday / pitching.metrics.ranks.csv
Last active January 21, 2017 01:21
comparison of pitching metrics from baseball reference & bpro
We can't make this file beautiful and searchable because it's too large.
"YEAR","NAME","RANK","LVL","AGE","G","GS","PITCHES","IP","IPStart","IPRelief","FIP","cFIP","ERA","DRA","PWARP","player_ID","WAR","WAA","IPouts","lg.dra","lg.era","lg.fip","lg.cfip","dra.minus","era.minus","fip.minus","cfip.minus","pwarp.per.inning","war.per.inning","era.rank","era.m.rank","dra.rank","dra.m.rank","fip.rank","fip.m.rank","cfip.rank","cfip.m.rank","pwarp.rank","pwarp.per.inning.rank","war.rank","war.per.inning.rank"
1968,"Bob Gibson",4495,"MLB",32,34,34,0,304.7,"304.7","0.0",1.93,69,1.12,1.9,8.51,"gibsobo01",11.24,8.6428,914,3.1796935736251,2.80775061774639,2.84043976293038,95.4706774366999,0.597541856158753,0.398895825334714,0.679472251159055,0.722734999400724,0.0279291106005907,0.0368927789934354,1,2,25,118,6,90,163,189,189,376,6,31
1985,"Dwight Gooden",4506,"MLB",20,35,35,0,276.7,"276.7","0.0",2.06,58,1.53,1.98,10.61,"goodedw01",12.15,9.7825,830,4.0327490848567,3.59918578955359,3.7396735848314,96.9331481491894,0.490980211844834,0.425096143811394,0.550850215472182,0.598350524123413,0.038344777
@bdilday
bdilday / march_madness_nodes.csv
Created February 21, 2017 19:25
march madness bracket tree
game_number seed_high seed_low child_game_number
1 1 63 33
2 30 36 33
3 19 50 34
4 14 52 34
5 23 41 35
6 10 56 35
7 27 38 36
8 7 61 36
9 4 67 37
@bdilday
bdilday / generatePitchingMarcels.py
Created March 13, 2017 01:56 — forked from JeffSackmann/generatePitchingMarcels.py
Generate a full season's worth of pitching Marcel projections from past years' stats
## Generate a full season's worth of pitching Marcel projections from past years' stats
from createTuple import createTuple ## gist: 778481
from writeMatrixCSV import writeMatrixCSV ## gist: 778484
def makePitTable(r):
for stat in ['AB', 'H', 'D', 'T', 'HR', 'SO', 'BB', 'SF', 'HP', 'CI', 'IPouts', 'R']:
if stat in r: pass
else: r[stat] = 0
ab = 0.9*r['IPouts'] + r['H']
@bdilday
bdilday / generateBattingMarcels.py
Created March 13, 2017 01:56 — forked from JeffSackmann/generateBattingMarcels.py
Generate a full season's worth of batting Marcel projections from past years' stats
## Generate a full season's worth of batting Marcel projections from past years' stats
from createTuple import createTuple ## gist: 778481
from writeMatrixCSV import writeMatrixCSV ## gist: 778484
def makeBatTable(r):
for stat in ['AB', 'H', 'D', 'T', 'HR', 'SO', 'BB', 'SF', 'HP', 'CI']:
if stat in r: pass
else: r[stat] = 0
if r['AB'] == 0:
@bdilday
bdilday / results2017.csv
Last active March 27, 2017 00:40
2017KaggleMarchMadness_Results
Id team1 team2 result
2017_1112_1315 Arizona North Dakota 1
2017_1116_1371 Arkansas Seton Hall 1
2017_1124_1308 Baylor New Mexico St 1
2017_1137_1452 Bucknell West Virginia 0
2017_1139_1457 Butler Winthrop 1
2017_1153_1448 Cincinnati Kansas St 1
2017_1166_1348 Creighton Rhode Island 0
2017_1173_1455 Dayton Wichita St 0
2017_1181_1407 Duke Troy 1
@bdilday
bdilday / d3.sankey.js
Last active May 2, 2017 01:19 — forked from emeeks/d3.sankey.js
Sankey Particles III - d3v4
d3.sankey = function() {
var sankey = {},
nodeWidth = 24,
nodePadding = 8,
size = [1, 1],
nodes = [],
links = [];
sankey.nodeWidth = function(_) {
if (!arguments.length) return nodeWidth;
@bdilday
bdilday / retrosheet_events_2016.csv
Last active May 28, 2017 20:56
sample retrosheet data
We can make this file beautiful and searchable if this error is corrected: Unclosed quoted field in line 1.
"GAME_ID","YEAR_ID","AWAY_TEAM_ID","INN_CT","BAT_HOME_ID","OUTS_CT","BALLS_CT","STRIKES_CT","PITCH_SEQ_TX","AWAY_SCORE_CT","HOME_SCORE_CT","BAT_ID","BAT_HAND_CD","RESP_BAT_ID","BAT_ON_DECK_ID","BAT_IN_HOLD_ID","RESP_BAT_HAND_CD","PIT_ID","PIT_HAND_CD","RESP_PIT_ID","RESP_PIT_HAND_CD","POS2_FLD_ID","POS3_FLD_ID","POS4_FLD_ID","POS5_FLD_ID","POS6_FLD_ID","POS7_FLD_ID","POS8_FLD_ID","POS9_FLD_ID","BASE1_RUN_ID","BASE2_RUN_ID","BASE3_RUN_ID","EVENT_TX","LEADOFF_FL","PH_FL","BAT_FLD_CD","BAT_LINEUP_ID","EVENT_CD","BAT_EVENT_FL","AB_FL","H_CD","SH_FL","SF_FL","EVENT_OUTS_CT","DP_FL","TP_FL","RBI_CT","WP_FL","PB_FL","FLD_CD","BATTEDBALL_CD","BUNT_FL","FOUL_FL","BATTEDBALL_LOC_TX","ERR_CT","ERR1_FLD_CD","ERR1_CD","ERR2_FLD_CD","ERR2_CD","ERR3_FLD_CD","ERR3_CD","BAT_DEST_ID","RUN1_DEST_ID","RUN2_DEST_ID","RUN3_DEST_ID","BAT_PLAY_TX","RUN1_PLAY_TX","RUN2_PLAY_TX","RUN3_PLAY_TX","RUN1_SB_FL","RUN2_SB_FL","RUN3_SB_FL","RUN1_CS_FL","RUN2_CS_FL","RUN3_CS_FL","RUN1_PK_FL","RUN2_PK_FL","RUN3_PK_FL","RUN1_RESP_PIT_ID","RUN2_R
@bdilday
bdilday / trajectory_calculator1.R
Last active February 20, 2018 03:54
trajectory calculator
trajectory_pars <- list(
# constants
mass = 5.125, # oz,
circumference = 9.125, # in
beta = 1.217e-4, # 1 / meter
cd0 = 0.3008,
cdspin = 0.0292,
cl0 = 0.583,
cl1 = 2.333,
@bdilday
bdilday / top10warfranch.R
Last active April 15, 2018 13:55
R graph - top10 war by franchise
library(readr)
library(ggplot2)
library(dplyr)
library(Lahman)
br_war = read_csv("https://www.baseball-reference.com/data/war_daily_bat.txt")
m = Lahman::Teams
ndf = br_war %>% merge(m %>% select(teamIDBR, yearID, name, franchID), by.x=c("team_ID", "year_ID"), by.y=c("teamIDBR", "yearID")) %>% group_by(franchID, name, player_ID) %>% summarise(w=sum(as.numeric(WAR), na.rm=TRUE))
xx = ndf %>% arrange(franchID, -w) %>% mutate(i=row_number()) %>% group_by(franchID, name) %>% summarise(m=max(i)) %>% arrange(franchID,-m) %>% mutate(ir=row_number()) %>% group_by(franchID) %>% mutate(m2=max(m), m=sum(m)) %>% filter(ir==1) %>% select(franchID, m=m2)
p = ndf %>% merge(xx, by="franchID") %>% group_by(franchID) %>% arrange(-w) %>% mutate(i=row_number()) %>% filter(i<=10) %>% filter(m>=110) %>% ggplot(aes(x=i, y=w)) + geom_bar(stat='identity') + facet_wrap(~franchID) + theme_minimal() + labs(x='franchise rank', y='career WAR')
@bdilday
bdilday / scrape_statcast_expected_stats.R
Last active May 17, 2018 01:27
scrape statcast expected stats
library(dplyr)
library(ggplot2)
library(rvest)
library(jsonlite)
scrape_statcast_expected_stats = function(year=2018, min_pa=25) {
url = sprintf("https://baseballsavant.mlb.com/expected_statistics?type=batter&year=%s&position=&team=&min=%d", year, min_pa)
h = xml2::read_html(url)
s = html_nodes(h, "script")[[10]]