Created
June 5, 2013 04:21
-
-
Save MonkmanMH/5711584 to your computer and use it in GitHub Desktop.
MLB runs per game (Lahman database)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load the package and data set "Teams" | |
install.packages("Lahman") | |
library("Lahman") | |
data(Teams) | |
# | |
# | |
# CREATE LEAGUE SUMMARY TABLES | |
# ============================ | |
# | |
# select a sub-set of teams from 1901 [the establishment of the American League] forward to 2012 | |
Teams_sub <- as.data.frame(subset (Teams, yearID > 1900)) | |
# | |
# calculate each team's average runs and runs allowed per game | |
Teams_sub$RPG <- Teams_sub$R / Teams_sub$G | |
Teams_sub$RAPG <- Teams_sub$RA / Teams_sub$G | |
# | |
# create new data frame with season totals for each league | |
LG_RPG <- aggregate(cbind(R, RA, G) ~ yearID + lgID, data = Teams_sub, sum) | |
# calculate league + season runs and runs allowed per game | |
LG_RPG$LG_RPG <- LG_RPG$R / LG_RPG$G | |
LG_RPG$LG_RAPG <- LG_RPG$RA / LG_RPG$G | |
# | |
# select a sub-set of teams from 1901 [the establishment of the American League] forward to 2012 | |
# read the data into separate league tables | |
ALseason <- (subset (LG_RPG, yearID > 1900 & lgID == "AL")) | |
NLseason <- (subset (LG_RPG, yearID > 1900 & lgID == "NL")) | |
# | |
# +++++++++++++++++++++++++++++++++++++++++++++++++++ | |
# | |
# RUNS SCORED PER GAME | |
# ==================== | |
# | |
# web references: | |
# http://princeofslides.blogspot.ca/2011/05/sab-r-metrics-basics-of-loess.html | |
# http://research.stowers-institute.org/efg/R/Statistics/loess.htm | |
# | |
# start with American League | |
# create new object ALRunScore.LO for loess model | |
ALRunScore.LO <- loess(ALseason$LG_RPG ~ ALseason$yearID) | |
ALRunScore.LO.predict <- predict(ALRunScore.LO) | |
# | |
# create new objects RunScore.Lo.XX for loess models with "span" control | |
ALRunScore.LO.25 <- loess(ALseason$LG_RPG ~ ALseason$yearID, span=0.25) | |
ALRunScore.LO.25.predict <- predict(ALRunScore.LO.25) | |
# | |
ALRunScore.LO.5 <- loess(ALseason$LG_RPG ~ ALseason$yearID, span=0.5) | |
ALRunScore.LO.5.predict <- predict(ALRunScore.LO.5) | |
# | |
# plot the data, add loess curve | |
ylim <- c(3,6) | |
plot(ALseason$LG_RPG ~ ALseason$yearID, | |
ylim = ylim, | |
main = "American League: runs per team per game, 1901-2012", | |
xlab = "year", ylab = "runs per game") | |
# loess predicted value line | |
lines(ALseason$yearID, ALRunScore.LO.predict, lty="solid", col="red", lwd=2) | |
lines(ALseason$yearID, ALRunScore.LO.25.predict, lty="dashed", col="blue", lwd=2) | |
lines(ALseason$yearID, ALRunScore.LO.5.predict, lty="dotdash", col="black", lwd=2) | |
legend(1980, 3.5, | |
c("default", "span=0.25", "span=0.50"), | |
lty=c("solid", "dashed", "dotdash"), | |
col=c("red", "blue", "black"), | |
lwd=c(2, 2, 2)) | |
grid() | |
# | |
# NATIONAL LEAGUE | |
# create new object RunScore.LO for loess model | |
NLRunScore.LO <- loess(NLseason$LG_RPG ~ NLseason$yearID) | |
NLRunScore.LO.predict <- predict(NLRunScore.LO) | |
# | |
# objects with span control in loess model | |
NLRunScore.LO.25 <- loess(NLseason$LG_RPG ~ NLseason$yearID, span=0.25) | |
NLRunScore.LO.25.predict <- predict(NLRunScore.LO.25) | |
NLRunScore.LO.5 <- loess(NLseason$LG_RPG ~ NLseason$yearID, span=0.5) | |
NLRunScore.LO.5.predict <- predict(NLRunScore.LO.5) | |
# | |
# plot the data, add loess curve | |
ylim <- c(3,6) | |
plot(NLseason$LG_RPG ~ NLseason$yearID, | |
pch=2, col="black", | |
ylim = ylim, | |
main = "National League: runs per team per game, 1901-2012", | |
xlab = "year", ylab = "runs per game") | |
# loess predicted value line | |
lines(NLseason$yearID, NLRunScore.LO.predict, lty="solid", col="blue", lwd=2) | |
lines(NLseason$yearID, NLRunScore.LO.25.predict, lty="dashed", col="red", lwd=2) | |
lines(NLseason$yearID, NLRunScore.LO.5.predict, lty="dotdash", col="black", lwd=2) | |
# chart tidying | |
legend(1980, 3.5, | |
c("default", "span=0.25", "span=0.50"), | |
lty=c("solid", "dashed", "dotdash"), | |
col=c("blue", "red", "black"), | |
lwd=c(2, 2, 2)) | |
grid() | |
# | |
# | |
# MULTI-PLOT -- MERGING AL AND NL RESULTS | |
# plot individual years as lines | |
ylim <- c(3,6) | |
# start with AL line | |
plot(ALseason$LG_RPG ~ ALseason$yearID, | |
type="l", lty="solid", col="red", lwd=2, | |
main = "Runs per team per game, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add NL line | |
lines(NLseason$yearID, NLseason$LG_RPG, lty="solid", col="blue", lwd=2) | |
# chart additions | |
grid() | |
legend(1900, 3.5, c("AL", "NL"), lty=c("solid", "solid"), col=c("red", "blue"), lwd=c(2, 2)) | |
# | |
# | |
# plot loess curves (span=0.25) | |
ylim <- c(3,6) | |
# start with AL line | |
plot(ALRunScore.LO.25.predict ~ ALseason$yearID, | |
type="l", lty="solid", col="red", lwd=2, | |
main = "Runs per team per game, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add NL line | |
lines(NLseason$yearID, NLRunScore.LO.25.predict, lty="dashed", col="blue", lwd=2) | |
# chart additions | |
legend(1900, 3.5, | |
c("AL (span=0.25)", "NL (span=0.25)"), | |
lty=c("solid", "dashed"), | |
col=c("red", "blue"), | |
lwd=c(2, 2)) | |
grid() | |
# | |
# | |
# plot loess curves (span=0.50) | |
ylim <- c(3,6) | |
# start with AL line | |
plot(ALRunScore.LO.5.predict ~ ALseason$yearID, | |
type="l", lty="solid", col="red", lwd=2, | |
main = "Runs per team per game, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add NL line | |
lines(NLseason$yearID, NLRunScore.LO.5.predict, lty="dashed", col="blue", lwd=2) | |
# chart additions | |
legend(1900, 3.5, | |
c("AL (span=0.50)", "NL (span=0.50)"), | |
lty=c("solid", "dashed"), | |
col=c("red", "blue"), | |
lwd=c(2, 2)) | |
grid() | |
# | |
# | |
# | |
# plot multiple loess curves (span=0.50 and 0.25) | |
ylim <- c(3,6) | |
# start with AL line | |
plot(ALRunScore.LO.5.predict ~ ALseason$yearID, | |
type="l", lty="solid", col="red", lwd=2, | |
main = "Runs per team per game, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add NL line | |
lines(NLseason$yearID, NLRunScore.LO.5.predict, lty="solid", col="blue", lwd=2) | |
# add 0.25 lines | |
lines(ALseason$yearID, ALRunScore.LO.25.predict, lty="dashed", col="red", lwd=2) | |
lines(NLseason$yearID, NLRunScore.LO.25.predict, lty="dashed", col="blue", lwd=2) | |
# chart additions | |
legend(1900, 3.5, | |
c("AL (span=0.50)", "NL (span=0.50)", "AL (span=0.25)", "NL (span=0.25)"), | |
lty=c("solid", "solid", "dashed", "dashed"), | |
col=c("red", "blue", "red", "blue"), | |
lwd=c(2, 2, 2, 2)) | |
grid() | |
# | |
# # # # # # # # # # # # # # # # # # | |
# | |
# calculate the difference between the two leagues | |
# 1. absolute | |
RunDiff <- (ALseason$LG_RPG - NLseason$LG_RPG) | |
# 2. LOESS span=0.25 | |
RunDiffLO <- (ALRunScore.LO.25.predict - NLRunScore.LO.25.predict) | |
# | |
# plot the LOESS difference | |
ylim <- c(-1,1) | |
plot(RunDiffLO ~ ALseason$yearID, | |
type="l", lty="solid", col="red", lwd=2, | |
main = "Run scoring trend: AL difference from NL, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add line at zero | |
abline(h = 0, lty="dotdash") | |
grid() | |
# | |
# plot each year difference as line, trend as line | |
ylim <- c(-1,1.5) | |
plot(RunDiffLO ~ ALseason$yearID, | |
type="l", lty="solid", col="red", lwd=3, | |
main = "Run scoring trend: AL difference from NL, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add RunDiff line | |
lines(ALseason$yearID, RunDiff, lty="solid", col="black", lwd=1) | |
# add line at zero | |
abline(h = 0, lty="dotdash") | |
grid() | |
# | |
# | |
# plot each year difference as bar, trend as line | |
ylim <- c(-1,1.5) | |
plot(RunDiff ~ ALseason$yearID, | |
type="h", lty="solid", col="blue", lwd=2, | |
main = "Run scoring trend: AL difference from NL, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add RunDiff line | |
lines(ALseason$yearID, RunDiffLO, lty="solid", col="black", lwd=2) | |
# add line at zero | |
abline(h = 0, lty="dotdash") | |
# chart additions | |
grid() | |
legend(1900, 1.5, | |
c("AL difference from NL: absolute", "AL difference from NL, LOESS (span=0.25)"), | |
lty=c("solid", "solid"), | |
col=c("blue", "black"), | |
lwd=c(2, 2)) | |
# | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment