Last active
August 29, 2015 13:56
-
-
Save bayesball/9220828 to your computer and use it in GitHub Desktop.
Trajectories of Ryan Howard and 10 similar hitters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Comparing Ryan Howard's with 10 similar players | |
# Richie Sexson, Cecil Fiedler, Mo Vaughn, Mark McGwire, Norm Cash | |
# Jay Buhner, Willie Stargel, Jason Giambi, Frank Howard, David Justice | |
# uses packages Lahman, dplyr, ggplot2 | |
# setup work and function to plot trajectory | |
# add argument plot=TRUE or FALSE | |
# plot=FALSE outputs the data frame with the rate data | |
library(Lahman) | |
library(dplyr) | |
# create new data frame Batting.new by | |
Batting.new <- summarise(group_by(Batting, playerID, yearID), | |
AB = sum(AB), | |
H = sum(H), | |
X2B = sum(X2B), | |
X3B = sum(X3B), | |
HR = sum(HR), | |
SB = sum(SB), | |
BB = sum(BB), | |
SO = sum(SO), | |
HBP = sum(HBP), | |
SF = sum(SF), | |
SH = sum(SH)) | |
myrecode <- function(data, var){ | |
data[, var] <- ifelse(is.na(data[, var]), 0, data[, var]) | |
data | |
} | |
Batting.new <- myrecode(Batting.new, "SF") | |
Batting.new <- myrecode(Batting.new, "SH") | |
# define plate appearance variable | |
Batting.new$PA <- with(Batting.new, AB + BB + HBP + SF + SH) | |
# add age variable | |
Master$birthyear <- with(Master, | |
ifelse(birthMonth >= 7, birthYear + 1, birthYear)) | |
Batting.new <- merge(Batting.new, | |
Master[, c("playerID", "nameFirst", "nameLast", "birthyear")], | |
by="playerID") | |
Batting.new$Age <- with(Batting.new, yearID - birthyear) | |
# add first and last years | |
library(dplyr) | |
C.Years <- summarise(group_by(Batting, playerID), | |
fYear=min(yearID), | |
lYear=max(yearID)) | |
Batting.new <- merge(Batting.new, C.Years, by="playerID") | |
# function to plot trajectory | |
plot.trajectory <- function(name, stat="H", denom="AB", | |
num=1, plot=TRUE){ | |
require(ggplot2) | |
firstlast <- unlist(strsplit(name," ")) | |
playerids <- unique(subset(Batting.new, | |
nameFirst==firstlast[1] & | |
nameLast==firstlast[2])$playerID) | |
d <- subset(Batting.new, playerID==playerids[num]) | |
d$Rate <- d[, stat] / d[, denom] | |
if(plot==TRUE){ | |
print(ggplot(d, aes(Age, Rate)) + | |
geom_point(size=5, color="red") + | |
geom_smooth(method="loess", size=3) + | |
theme(axis.text = element_text(size = rel(2))) + | |
theme(axis.title = element_text(size = rel(2))) + | |
theme(plot.title = element_text(size = rel(2))) + | |
labs(title = paste(stat,"/",denom, | |
"Career Trajectory of", name, | |
d$fYear, "to", | |
d$lYear)))} else { | |
data.frame(Player=name, Age=d$Age, Rate=d$Rate) | |
} | |
} | |
# compare trajectories | |
d <- NULL | |
names <- c("Ryan Howard", "Richie Sexson", | |
"Cecil Fielder", "Mo Vaughn", "Mark McGwire", | |
"Norm Cash", "Jay Buhner", "Willie Stargell", | |
"Jason Giambi", "Frank Howard", "David Justice") | |
for (j in 1:11) | |
d <- rbind(d, plot.trajectory(names[j], "HR", plot=FALSE)) | |
d <- rbind(d, data.frame(Player="Ryan Howard", | |
Age=33, | |
Rate=11/286)) | |
library(ggplot2) | |
print(ggplot(d, aes(Age, Rate)) + | |
geom_point(size=3, color="red") + | |
geom_smooth(method="loess", size=1.5) + | |
facet_wrap(~ Player, ncol=4) + | |
ylab("HOME RUN RATE") + xlab("AGE") + | |
theme(strip.text = element_text(size = rel(2))) | |
) | |
# quadratic fits | |
# ggplot(d, aes(Age, Rate)) + | |
# geom_point(size=3, color="red") + | |
# geom_smooth(method="lm", formula=y~x+I(x^2), size=1.5) + | |
# facet_wrap(~ Player, ncol=4) + | |
# ylab("HOME RUN RATE") + xlab("AGE") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment