-
-
Save fototo/157bf06e9e2b62041bc3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################### | |
# working with complete set of 2015 play-by-play data | |
# collected using the getData function in the openWAR | |
# package (retrieves MLBAM GameDay files) | |
# currently have this saved as a Rdata file | |
############################################### | |
load("alldata2015.Rdata") | |
# computes the run values of all plate appearances | |
library(dplyr) | |
d2015 <- mutate(d2015, | |
R1B=!is.na(start1B), | |
R2B=!is.na(start2B), | |
R3B=!is.na(start3B), | |
E1B=!is.na(end1B), | |
E2B=!is.na(end2B), | |
E3B=!is.na(end3B)) | |
(runs_expectancy <- summarize(group_by(d2015, | |
startOuts, R1B, R2B, R3B), | |
Runs=mean(runsFuture))) | |
d2015 <- inner_join(d2015, runs_expectancy, | |
by=c("startOuts", "R1B", "R2B", "R3B")) | |
d2015 <- inner_join(d2015, runs_expectancy, | |
by=c("endOuts"="startOuts", | |
"E1B"="R1B", | |
"E2B"="R2B", | |
"E3B"="R3B")) | |
d2015 <- mutate(d2015, | |
Runs=Runs.y - Runs.x + runsOnPlay) | |
############################### | |
# adjustment for ballpark | |
############################### | |
# look at batter means | |
S <- summarize(group_by(d2015, batterName), | |
R=mean(Runs), N=n()) | |
# get team identifier for each player | |
d2015 <- mutate(d2015, | |
bat_team=ifelse(half=="top", | |
as.character(away_team), | |
as.character(home_team))) | |
TeamData <- summarize(group_by(d2015, batterName), | |
Team=names(sort(table(bat_team), decreasing=TRUE))[1]) | |
S1 <- inner_join(S, TeamData, | |
by="batterName") | |
# plots mean run values against PA for all players | |
# with Rockie players identified | |
library(ggplot2) | |
ggplot(S1, aes(N, R)) + geom_point(alpha=.2) + | |
ylim(c(-.15, .15)) + geom_smooth() + | |
geom_point(data=filter(S1, Team=="col"), | |
aes(N, R), color="red") + | |
ggtitle("Mean Run Values for all Players in 2015 Season\nRockies Players in Red") | |
###### regress run values on bat team ids and computes residuals | |
fit <- lm(Runs ~ 0 + bat_team, data=d2015) | |
d2015$Residual <- fit$residuals | |
R <- summarize(group_by(d2015, batterName), | |
Residual=mean(Residual), N=n()) | |
R1 <- inner_join(R, TeamData, | |
by="batterName") | |
# plots mean residual values against PA for all players | |
# with Rockie players identified | |
ggplot(R1, aes(N, Residual)) + geom_point(alpha=.2) + | |
ylim(c(-.15, .15)) + geom_smooth() + | |
geom_point(data=filter(R1, Team=="col"), | |
aes(N, Residual), color="red") + | |
ggtitle("Mean Residual Values Adjusted for Ballpark\nRockies Players in Red") | |
# look at arm and batter side effects | |
A <- summarize(group_by(d2015, batterName, stand, throws), | |
Mean=mean(Runs), N=n()) | |
A <- mutate(A, Platoon=paste(stand, 'hitter against', | |
throws, 'pitcher')) | |
A.mean <- summarize(group_by(A, Platoon), | |
Mean=sum(Mean * N) / sum(N)) | |
# graphs mean runs and PA for all players in four platoon | |
# situations | |
ggplot(A, aes(N, Mean)) + geom_point() + | |
facet_wrap(~ Platoon, ncol=2)+ ylim(c(-.15, .15)) + | |
geom_hline(yintercept=0) + | |
geom_hline(aes(yintercept=Mean), data=A.mean, color="red") + | |
ggtitle("Mean Run Values for all Players in 2015 Season\nPlatoon Effects") | |
d2015 <- mutate(d2015, platoon=factor(paste(stand, throws))) | |
fit2 <- lm(Runs ~ 0 + platoon, data=d2015) | |
d2015$Residual2 <- fit2$residuals | |
A1 <- summarize(group_by(d2015, batterName, stand, throws), | |
Mean=mean(Residual2), N=n()) | |
A1 <- mutate(A1, Platoon=paste(stand, 'hitter against', throws, 'pitcher')) | |
A1.mean <- summarize(group_by(A1, Platoon), | |
Mean=sum(Mean * N) / sum(N)) | |
# graphs mean residuals and PA for all players in four platoon | |
# situations | |
ggplot(A1, aes(N, Mean)) + geom_point() + | |
facet_wrap(~ Platoon, ncol=2)+ ylim(c(-.15, .15)) + | |
geom_hline(yintercept=0) + | |
geom_hline(aes(yintercept=Mean), data=A1.mean, color="red") + | |
ggtitle("Mean Residual Values for all Players in 2015 Season\nPlatoon Effects") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment