Skip to content

Instantly share code, notes, and snippets.

@dggoldst
Last active August 29, 2015 14:08
Show Gist options
  • Save dggoldst/03f9102f9333a450375a to your computer and use it in GitHub Desktop.
Save dggoldst/03f9102f9333a450375a to your computer and use it in GitHub Desktop.
library(Lahman)
library(plyr)
library(dplyr)
library(ggplot2)
master = Lahman::Master %>%
select(playerID,birthYear)
bstats <- battingStats() %>%
select(playerID, yearID,BA)
batting=left_join(bstats,master) %>%
mutate(age=yearID-birthYear) %>%
arrange(playerID,age) %>%
filter(!is.na(BA) & !is.na(age)) %>%
group_by(playerID) %>%
filter(length(playerID)>=2) %>%
mutate(yearsLeft=min_rank(desc(age))-1) %>%
filter(!(yearsLeft==0 & yearID==2013))
###Split by people in their last year of play or not
plot_dataA = batting %>%
group_by(age,LastYear=!(yearsLeft>0)) %>%
summarise(
mu=mean(BA),
se=sqrt(var(BA)/length(BA)),
obs=length(BA)
) %>%
filter(age>=20 & age <=42)
plot_dataA$se=with(plot_dataA,ifelse(LastYear,0,se))
plot_dataA$CareerYear=with(plot_dataA,ifelse(LastYear,"Last","Not Last"))
###Compute regardless of last year or not
plot_dataB = batting %>%
group_by(age) %>%
summarise(
LastYear=NA,
mu=mean(BA),
se=sqrt(var(BA)/length(BA)),
obs=length(BA)
) %>%
filter(age>=20 & age <=42)
plot_dataB$CareerYear="Combined"
plot_data=rbind(plot_dataA,plot_dataB)
plot_data$CareerYear=factor(plot_data$CareerYear,levels=c("Not Last","Combined","Last"))
#Hi Michael Schulte-Mecklenbeck!
p=ggplot(plot_data,aes(x=age,y=mu,ymin=mu-se,ymax=mu+se,group=CareerYear,color=CareerYear)) +
geom_line() +
geom_pointrange() +
geom_point(aes(size=obs)) +
scale_size_area() +
labs(x="Age",y="Mean Batting Average")+
theme(legend.position="bottom")
p
ggsave(plot=p,file="BA_by_Age.png",width=4,height=6)
p=ggplot(plot_data,aes(x=age,y=mu,ymin=mu-se,ymax=mu+se,group=CareerYear,color=CareerYear)) +
stat_smooth(span=10,level=.9,method="loess") +
geom_point(aes(size=obs)) +
scale_size_area() +
labs(x="Age",y="Mean Batting Average")+
theme(legend.position="bottom")
p
ggsave(plot=p,file="BA_by_Age_smooth.png",width=4,height=6)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment