Created
October 24, 2015 14:55
-
-
Save bayesball/818f800afdda4f031555 to your computer and use it in GitHub Desktop.
Daniel Murphy -- Learning about his home run ability and predicting his home run output in the 2015 World Series
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Daniel Murphy Exercise | |
| # Part I -- learning about Murphy's home run ability | |
| # and updating this knowledge after the NLDS and NLCS | |
| library(ggplot2) | |
| library(LearnBayes) | |
| # career home run data for Murphy | |
| seasons <- c(2008, 2009, 2011:2015) | |
| hr <- c(2, 12, 6, 6, 13,9, 14) | |
| ab <- c(131, 508, 391, 571, 658, 596, 499) | |
| murphy <- data.frame(seasons, hr, ab) | |
| # fitting exchangeable model to this data | |
| fit <- laplace(betabinexch, c(1, 1), cbind(hr, ab)) | |
| K <- exp(fit$mode[2]) | |
| eta <- exp(fit$mode[1]) / (1 + exp(fit$mode[1])) | |
| # use the estimated random effects distribution as my initial prior for P | |
| new.post <- c(K * eta, K * (1 - eta)) # beta shape parameters | |
| # predictive simulation -- simulate [P] [Y | P] | |
| # Y = number of home runs hit in 21 AB in NLDS | |
| p.new <- rbeta(10000, new.post[1], new.post[2]) | |
| y.new <- rbinom(10000, size=21, prob=p.new) | |
| ggplot(data.frame(P=p.new), aes(P)) + geom_density() + | |
| ggtitle("MY PRIOR") | |
| ggplot(data.frame(Y=y.new), aes(Y)) + geom_bar() + | |
| geom_vline(xintercept = 3, color="red", size=3) + | |
| ggtitle("PREDICTIVE DENSITY FOR NUMBER OF HR IN NLDS") | |
| # we now observe HR data from first round - 3 out of 21 | |
| # update my prior | |
| new.post2 <- new.post + c(3, 21 - 3) | |
| # predictive simulation | |
| # Y = number of home runs hit in 17 AB in NLCS | |
| p.new2 <- rbeta(10000, new.post2[1], new.post2[2]) | |
| y.new2 <- rbinom(10000, size=17, prob=p.new2) | |
| # graph comparing initial prior and updated prior | |
| dp <- rbind(data.frame(Type="Prior", P=p.new), | |
| data.frame(Type="New Prior", P=p.new2)) | |
| ggplot(dp, aes(P, color=Type)) + geom_density() | |
| ggplot(data.frame(Y=y.new), aes(Y)) + geom_bar() + | |
| geom_vline(xintercept = 4, color="red", size=3) + | |
| ggtitle("PREDICTIVE DENSITY FOR NUMBER OF HR IN NLCS") | |
| # Part II - predicting number of HR for 2015 World Series | |
| # update again to get my prior after the results of the two playoffs | |
| new.post3 <- new.post2 + c(4, 17-4) | |
| curve(dbeta(x, new.post3[1], new.post3[2]), 0, 0.07, | |
| xlab="P", ylab="Density", main="My Prior at Beginning of 2015 WS") | |
| # construct a prior on number of games and number of at-bats | |
| # Here is data I used to find the distribution of Murphy's game AB | |
| #library(XML) | |
| #d <- readHTMLTable('http://www.baseball-reference.com/players/gl.cgi?id=murphda08&t=b&year=2015') | |
| #gamelogs <- d[[3]] | |
| atbats <- c(3, 4, 5) | |
| atbats.probs <- c(16, 79, 19) / (16 + 79 + 19) | |
| # Here is the distribution of the length of the WS assuming | |
| # teams of equal ability | |
| length.series <- 4:7 | |
| length.series.probs <- c(2 * .5 ^ 4, 2 * 4 * .5 ^ 5, | |
| 2 * 10 * .5 ^ 6, 2 * 20 * .5 ^ 7) | |
| # do 10,000 simulations of predictive AB | |
| r.length.series <- sample(length.series, size=10000, | |
| prob=length.series.probs, | |
| replace=TRUE) | |
| get.ab <- function(j) | |
| sum(sample(atbats, size=r.length.series[j], | |
| prob=atbats.probs, replace=TRUE)) | |
| r.atbats <- sapply(1:1000, get.ab) | |
| # ready to simulate home runs | |
| p.new3 <- rbeta(10000, new.post3[1], new.post3[2]) | |
| y <- rbinom(10000, size=r.atbats, prob=p.new3) | |
| # tabulate and graph the predictive distribution | |
| # for the number of home runs | |
| TB <- round(table(y) / 10000, 3) | |
| TB | |
| K <- max(as.numeric(names(TB))) | |
| O <- data.frame(Number_of_Homeruns=0:K, | |
| Probability=round(as.numeric(table(y)) / 10000, 3)) | |
| qplot(Number_of_Homeruns, Probability, | |
| data = O, geom = "segment", | |
| yend = 0, xend = Number_of_Homeruns, size = I(10)) + | |
| ggtitle("Predictive Distribution of Number of Murphy Home Runs in WS") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment