Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active August 29, 2015 13:56
Show Gist options
  • Save bayesball/9220828 to your computer and use it in GitHub Desktop.
Save bayesball/9220828 to your computer and use it in GitHub Desktop.
Trajectories of Ryan Howard and 10 similar hitters
# Comparing Ryan Howard's with 10 similar players
# Richie Sexson, Cecil Fiedler, Mo Vaughn, Mark McGwire, Norm Cash
# Jay Buhner, Willie Stargel, Jason Giambi, Frank Howard, David Justice
# uses packages Lahman, dplyr, ggplot2
# setup work and function to plot trajectory
# add argument plot=TRUE or FALSE
# plot=FALSE outputs the data frame with the rate data
library(Lahman)
library(dplyr)
# create new data frame Batting.new by
Batting.new <- summarise(group_by(Batting, playerID, yearID),
AB = sum(AB),
H = sum(H),
X2B = sum(X2B),
X3B = sum(X3B),
HR = sum(HR),
SB = sum(SB),
BB = sum(BB),
SO = sum(SO),
HBP = sum(HBP),
SF = sum(SF),
SH = sum(SH))
myrecode <- function(data, var){
data[, var] <- ifelse(is.na(data[, var]), 0, data[, var])
data
}
Batting.new <- myrecode(Batting.new, "SF")
Batting.new <- myrecode(Batting.new, "SH")
# define plate appearance variable
Batting.new$PA <- with(Batting.new, AB + BB + HBP + SF + SH)
# add age variable
Master$birthyear <- with(Master,
ifelse(birthMonth >= 7, birthYear + 1, birthYear))
Batting.new <- merge(Batting.new,
Master[, c("playerID", "nameFirst", "nameLast", "birthyear")],
by="playerID")
Batting.new$Age <- with(Batting.new, yearID - birthyear)
# add first and last years
library(dplyr)
C.Years <- summarise(group_by(Batting, playerID),
fYear=min(yearID),
lYear=max(yearID))
Batting.new <- merge(Batting.new, C.Years, by="playerID")
# function to plot trajectory
plot.trajectory <- function(name, stat="H", denom="AB",
num=1, plot=TRUE){
require(ggplot2)
firstlast <- unlist(strsplit(name," "))
playerids <- unique(subset(Batting.new,
nameFirst==firstlast[1] &
nameLast==firstlast[2])$playerID)
d <- subset(Batting.new, playerID==playerids[num])
d$Rate <- d[, stat] / d[, denom]
if(plot==TRUE){
print(ggplot(d, aes(Age, Rate)) +
geom_point(size=5, color="red") +
geom_smooth(method="loess", size=3) +
theme(axis.text = element_text(size = rel(2))) +
theme(axis.title = element_text(size = rel(2))) +
theme(plot.title = element_text(size = rel(2))) +
labs(title = paste(stat,"/",denom,
"Career Trajectory of", name,
d$fYear, "to",
d$lYear)))} else {
data.frame(Player=name, Age=d$Age, Rate=d$Rate)
}
}
# compare trajectories
d <- NULL
names <- c("Ryan Howard", "Richie Sexson",
"Cecil Fielder", "Mo Vaughn", "Mark McGwire",
"Norm Cash", "Jay Buhner", "Willie Stargell",
"Jason Giambi", "Frank Howard", "David Justice")
for (j in 1:11)
d <- rbind(d, plot.trajectory(names[j], "HR", plot=FALSE))
d <- rbind(d, data.frame(Player="Ryan Howard",
Age=33,
Rate=11/286))
library(ggplot2)
print(ggplot(d, aes(Age, Rate)) +
geom_point(size=3, color="red") +
geom_smooth(method="loess", size=1.5) +
facet_wrap(~ Player, ncol=4) +
ylab("HOME RUN RATE") + xlab("AGE") +
theme(strip.text = element_text(size = rel(2)))
)
# quadratic fits
# ggplot(d, aes(Age, Rate)) +
# geom_point(size=3, color="red") +
# geom_smooth(method="lm", formula=y~x+I(x^2), size=1.5) +
# facet_wrap(~ Player, ncol=4) +
# ylab("HOME RUN RATE") + xlab("AGE")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment