Created
February 17, 2013 06:45
-
-
Save MonkmanMH/4970480 to your computer and use it in GitHub Desktop.
MLB runs per game - league trends
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# MAJOR LEAGUE BASEBALL - RUNS PER GAME TREND | |
# | |
# discussion at | |
# 1. http://bayesball.blogspot.ca/2012/07/trends-in-al-run-scoring-using-r.html | |
# 2. http://bayesball.blogspot.ca/2012/07/trends-in-run-scoring-nl-edition-more-r.html | |
# 3. http://bayesball.blogspot.ca/2012/08/trends-in-run-scoring-comparing-leagues.html | |
# | |
# data source: Baseball Reference | |
# http://www.baseball-reference.com | |
# http://www.baseball-reference.com/leagues/AL/bat.shtml | |
# http://www.baseball-reference.com/leagues/NL/bat.shtml | |
# | |
# open with "csv" option (top right corner of the table), | |
# copy and paste into text editor or Excel, save as CSV file | |
# | |
# set working directory | |
# setwd("K:/data/R_the software/datatrials/baseball/RunsPerGame") | |
# read the data into a table | |
ALseason <- read.table(file="ALseasons.csv", sep = ",", header = TRUE) | |
NLseason <- read.table(file="NLseasons.csv", sep = ",", header = TRUE) | |
# | |
# alternate approach to reading NL season, from .txt file | |
NLseason <- read.table(file="NLseasons.txt", sep = ",", header = TRUE) | |
# | |
# | |
# RUNS SCORED PER GAME | |
# ==================== | |
# | |
# start with American League | |
# | |
# very simple plot -- as (x, y) | |
plot(ALseason$Year, ALseason$R) | |
# as (y predicted by x) | |
plot(ALseason$R ~ ALseason$Year) | |
# | |
# create new object ALRunScore.LO for loess model | |
ALRunScore.LO <- loess(ALseason$R ~ ALseason$Year) | |
ALRunScore.LO.predict <- predict(ALRunScore.LO) | |
# | |
# plot the data, add loess curve | |
ylim <- c(3,6) | |
plot(ALseason$R ~ ALseason$Year, | |
ylim = ylim, | |
main = "American League: runs per team per game, 1901-2012", | |
xlab = "year", ylab = "runs per game") | |
# chart tidying | |
grid() | |
# loess predicted value line | |
lines(ALseason$Year, ALRunScore.LO.predict, | |
lty="solid", col="red", lwd=2) | |
# | |
# | |
# VERSION 2 -- add "span" control to adjust smoothing | |
# | |
# references: | |
# http://princeofslides.blogspot.ca/2011/05/sab-r-metrics-basics-of-loess.html | |
# http://research.stowers-institute.org/efg/R/Statistics/loess.htm | |
# | |
# create new object RunScore.LO for loess model, span=0.25 | |
ALRunScore.LO.25 <- loess(ALseason$R ~ ALseason$Year, span=0.25) | |
ALRunScore.LO.25.predict <- predict(ALRunScore.LO.25) | |
# | |
ALRunScore.LO.5 <- loess(ALseason$R ~ ALseason$Year, span=0.5) | |
ALRunScore.LO.5.predict <- predict(ALRunScore.LO.5) | |
# | |
# plot the data, add loess curve | |
ylim <- c(3,6) | |
plot(ALseason$R ~ ALseason$Year, | |
ylim = ylim, | |
main = "American League: runs per team per game, 1901-2012", | |
xlab = "year", ylab = "runs per game") | |
# loess predicted value line | |
lines(ALseason$Year, ALRunScore.LO.predict, lty="solid", col="red", lwd=2) | |
lines(ALseason$Year, ALRunScore.LO.25.predict, lty="dashed", col="blue", lwd=2) | |
lines(ALseason$Year, ALRunScore.LO.5.predict, lty="dotdash", col="black", lwd=2) | |
# chart tidying | |
legend(1980, 3.5, | |
c("default", "span=0.25", "span=0.50"), | |
lty=c("solid", "dashed", "dotdash"), | |
col=c("red", "blue", "black"), | |
lwd=c(2, 2, 2)) | |
grid() | |
# | |
# | |
# | |
#-N-N-N-N-N-N-N-N-N-N-N-N-N-N | |
# | |
# NATIONAL LEAGUE | |
# | |
# create new object RunScore.LO for loess model | |
NLRunScore.LO <- loess(NLseason$R ~ NLseason$Year) | |
NLRunScore.LO.predict <- predict(NLRunScore.LO) | |
# | |
# plot the data, add loess curve | |
ylim <- c(3,6) | |
plot(NLseason$R ~ NLseason$Year, | |
pch=2, col="black", | |
ylim = ylim, | |
main = "National League: runs per team per game, 1901-2012", | |
xlab = "year", ylab = "runs per game") | |
# loess predicted value line | |
lines(NLseason$Year, NLRunScore.LO.predict, lty="solid", col="blue", lwd=2) | |
# chart tidying | |
grid() | |
# | |
# | |
# VERSION 2 -- add "span" control to adjust smoothing | |
# | |
# reference: http://research.stowers-institute.org/efg/R/Statistics/loess.htm | |
# | |
# create new object RunScore.LO for loess model | |
NLRunScore.LO.25 <- loess(NLseason$R ~ NLseason$Year, span=0.25) | |
NLRunScore.LO.25.predict <- predict(NLRunScore.LO.25) | |
NLRunScore.LO.5 <- loess(NLseason$R ~ NLseason$Year, span=0.5) | |
NLRunScore.LO.5.predict <- predict(NLRunScore.LO.5) | |
# | |
# plot the data, add loess curve | |
ylim <- c(3,6) | |
plot(NLseason$R ~ NLseason$Year, | |
pch=2, col="black", | |
ylim = ylim, | |
main = "National League: runs per team per game, 1901-2012", | |
xlab = "year", ylab = "runs per game") | |
# loess predicted value line | |
lines(NLseason$Year, NLRunScore.LO.predict, lty="solid", col="blue", lwd=2) | |
lines(NLseason$Year, NLRunScore.LO.25.predict, lty="dashed", col="red", lwd=2) | |
lines(NLseason$Year, NLRunScore.LO.5.predict, lty="dotdash", col="black", lwd=2) | |
# chart tidying | |
legend(1980, 3.5, | |
c("default", "span=0.25", "span=0.50"), | |
lty=c("solid", "dashed", "dotdash"), | |
col=c("blue", "red", "black"), | |
lwd=c(2, 2, 2)) | |
grid() | |
# | |
# | |
# | |
# | |
# MULTI-PLOT -- MERGING AL AND NL RESULTS | |
# | |
# plot individual years as points | |
ylim <- c(3,6) | |
# start with AL | |
plot(ALseason$R ~ ALseason$Year, | |
type="p", pch=1, col="black", | |
main = "Runs per team per game, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add NL line | |
points(NLseason$Year, NLseason$R, pch=2, col="blue") | |
# chart additions | |
grid() | |
legend(1900, 6, c("AL", "NL"), pch=c(1, 2), col=c("black", "blue")) | |
# | |
# plot individual years as lines | |
ylim <- c(3,6) | |
# start with AL line | |
plot(ALseason$R ~ ALseason$Year, | |
type="l", lty="solid", col="red", lwd=2, | |
main = "Runs per team per game, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add NL line | |
lines(NLseason$Year, NLseason$R, lty="solid", col="blue", lwd=2) | |
# chart additions | |
grid() | |
legend(1900, 3.5, c("AL", "NL"), lty=c("solid", "solid"), col=c("red", "blue"), lwd=c(2, 2)) | |
# | |
# | |
# plot loess curves (span=0.25) | |
ylim <- c(3,6) | |
# start with AL line | |
plot(ALRunScore.LO.25.predict ~ ALseason$Year, | |
type="l", lty="solid", col="red", lwd=2, | |
main = "Runs per team per game, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add NL line | |
lines(NLseason$Year, NLRunScore.LO.25.predict, lty="dashed", col="blue", lwd=2) | |
# chart additions | |
legend(1900, 3.5, | |
c("AL (span=0.25)", "NL (span=0.25)"), | |
lty=c("solid", "dashed"), | |
col=c("red", "blue"), | |
lwd=c(2, 2)) | |
grid() | |
# | |
# | |
# plot loess curves (span=0.50) | |
ylim <- c(3,6) | |
# start with AL line | |
plot(ALRunScore.LO.5.predict ~ ALseason$Year, | |
type="l", lty="solid", col="red", lwd=2, | |
main = "Runs per team per game, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add NL line | |
lines(NLseason$Year, NLRunScore.LO.5.predict, lty="dashed", col="blue", lwd=2) | |
# chart additions | |
legend(1900, 3.5, | |
c("AL (span=0.50)", "NL (span=0.50)"), | |
lty=c("solid", "dashed"), | |
col=c("red", "blue"), | |
lwd=c(2, 2)) | |
grid() | |
# | |
# | |
# | |
# plot multiple loess curves (span=0.50 and 0.25) | |
ylim <- c(3,6) | |
# start with AL line | |
plot(ALRunScore.LO.5.predict ~ ALseason$Year, | |
type="l", lty="solid", col="red", lwd=2, | |
main = "Runs per team per game, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add NL line | |
lines(NLseason$Year, NLRunScore.LO.5.predict, lty="solid", col="blue", lwd=2) | |
# add 0.25 lines | |
lines(ALseason$Year, ALRunScore.LO.25.predict, lty="dashed", col="red", lwd=2) | |
lines(NLseason$Year, NLRunScore.LO.25.predict, lty="dashed", col="blue", lwd=2) | |
# chart additions | |
legend(1900, 3.5, | |
c("AL (span=0.50)", "NL (span=0.50)", "AL (span=0.25)", "NL (span=0.25)"), | |
lty=c("solid", "solid", "dashed", "dashed"), | |
col=c("red", "blue", "red", "blue"), | |
lwd=c(2, 2, 2, 2)) | |
grid() | |
# | |
# # # # # # # # # # # # # # # # # # | |
# | |
# calculate the difference between the two leagues | |
# 1. absolute | |
RunDiff <- (ALseason$R - NLseason$R) | |
# 2. LOESS span=0.25 | |
RunDiffLO <- (ALRunScore.LO.25.predict - NLRunScore.LO.25.predict) | |
# | |
# plot the LOESS difference | |
ylim <- c(-1,1) | |
plot(RunDiffLO ~ ALseason$Year, | |
type="l", lty="solid", col="red", lwd=2, | |
main = "Run scoring trend: AL difference from NL, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add line at zero | |
abline(h = 0, lty="dotdash") | |
grid() | |
# | |
# plot each year difference as line, trend as line | |
ylim <- c(-1,1.5) | |
plot(RunDiffLO ~ ALseason$Year, | |
type="l", lty="solid", col="red", lwd=3, | |
main = "Run scoring trend: AL difference from NL, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add RunDiff line | |
lines(ALseason$Year, RunDiff, lty="solid", col="black", lwd=1) | |
# add line at zero | |
abline(h = 0, lty="dotdash") | |
grid() | |
# | |
# | |
# plot each year difference as bar, trend as line | |
ylim <- c(-1,1.5) | |
plot(RunDiff ~ ALseason$Year, | |
type="h", lty="solid", col="blue", lwd=2, | |
main = "Run scoring trend: AL difference from NL, 1901-2012", | |
ylim = ylim, | |
xlab = "year", ylab = "runs per game") | |
# add RunDiff line | |
lines(ALseason$Year, RunDiffLO, lty="solid", col="black", lwd=2) | |
# add line at zero | |
abline(h = 0, lty="dotdash") | |
# chart additions | |
grid() | |
legend(1900, 1.5, | |
c("AL difference from NL: absolute", "AL difference from NL, LOESS (span=0.25)"), | |
lty=c("solid", "solid"), | |
col=c("blue", "black"), | |
lwd=c(2, 2)) | |
# | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment