Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active August 29, 2015 13:56
Show Gist options
  • Save bayesball/9043429 to your computer and use it in GitHub Desktop.
Save bayesball/9043429 to your computer and use it in GitHub Desktop.
R script and function to plot the career trajectory for a MLB baseball hitter
#######################################################
# trajectory.2014.R
# script and function to plot a career trajectory for a batter
# in major league baseball
# requires installation of packages
# Lahman, dplyr, and ggplot2
#
# once this script is sourced, then graph trajectory of
# Alex Rodriquez's home run rates by typing
# plot.trajectory("Alex Rodriguez", "HR")
#######################################################
# setup work
library(Lahman)
library(dplyr)
# create new data frame Batting.new by
# collapsing over stint variable - took 0.34 seconds
Batting.new <- summarise(group_by(Batting, playerID, yearID),
AB = sum(AB),
H = sum(H),
X2B = sum(X2B),
X3B = sum(X3B),
HR = sum(HR),
SB = sum(SB),
BB = sum(BB),
SO = sum(SO),
HBP = sum(HBP),
SF = sum(SF),
SH = sum(SH))
myrecode <- function(data, var){
data[, var] <- ifelse(is.na(data[, var]), 0, data[, var])
data
}
Batting.new <- myrecode(Batting.new, "SF")
Batting.new <- myrecode(Batting.new, "SH")
# define plate appearance variable
Batting.new$PA <- with(Batting.new, AB + BB + HBP + SF + SH)
# add age variable
Master$birthyear <- with(Master,
ifelse(birthMonth >= 7, birthYear + 1, birthYear))
Batting.new <- merge(Batting.new,
Master[, c("playerID", "nameFirst", "nameLast", "birthyear")],
by="playerID")
Batting.new$Age <- with(Batting.new, yearID - birthyear)
# add first and last years
library(dplyr)
C.Years <- summarise(group_by(Batting, playerID),
fYear=min(yearID),
lYear=max(yearID))
Batting.new <- merge(Batting.new, C.Years, by="playerID")
# function to plot trajectory
plot.trajectory <- function(name, stat="H", denom="AB", num=1){
require(ggplot2)
firstlast <- unlist(strsplit(name," "))
playerids <- unique(subset(Batting.new,
nameFirst==firstlast[1] &
nameLast==firstlast[2])$playerID)
d <- subset(Batting.new, playerID==playerids[num])
d$Rate <- d[, stat] / d[, denom]
print(ggplot(d, aes(Age, Rate)) +
geom_point(size=5, color="red") +
geom_smooth(method="loess", size=3) +
theme(axis.text = element_text(size = rel(2))) +
theme(axis.title = element_text(size = rel(2))) +
theme(plot.title = element_text(size = rel(2))) +
labs(title =
paste(stat,"/",denom,
"Career Trajectory of", name,
d$fYear, "to",
d$lYear)))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment