bayesball · October 24, 2015 14:55
diff --git a/murphywork.R b/murphywork.R
 # Daniel Murphy Exercise

 # Part I -- learning about Murphy's home run ability
 # and updating this knowledge after the NLDS and NLCS

 library(ggplot2)
 library(LearnBayes)

 # career home run data for Murphy 

 seasons <- c(2008, 2009, 2011:2015)
 hr <- c(2, 12, 6, 6, 13,9, 14)
 ab <- c(131, 508, 391, 571, 658, 596, 499)
 murphy <- data.frame(seasons, hr, ab)

 # fitting exchangeable model to this data

 fit <- laplace(betabinexch, c(1, 1), cbind(hr, ab))
 K <- exp(fit$mode[2])
 eta <- exp(fit$mode[1]) / (1 + exp(fit$mode[1]))

 # use the estimated random effects distribution as my initial prior for P

 new.post <- c(K * eta, K * (1 - eta))  # beta shape parameters

 # predictive simulation -- simulate [P] [Y | P]
 # Y = number of home runs hit in 21 AB in NLDS

 p.new <- rbeta(10000, new.post[1], new.post[2])
 y.new <- rbinom(10000, size=21, prob=p.new)

 ggplot(data.frame(P=p.new), aes(P)) + geom_density() + 
  ggtitle("MY PRIOR")

 ggplot(data.frame(Y=y.new), aes(Y)) + geom_bar() +
  geom_vline(xintercept = 3, color="red", size=3) +
  ggtitle("PREDICTIVE DENSITY FOR NUMBER OF HR IN NLDS") 

 # we now observe HR data from first round - 3 out of 21

 # update my prior

 new.post2 <- new.post + c(3, 21 - 3)

 # predictive simulation
 # Y = number of home runs hit in 17 AB in NLCS

 p.new2 <- rbeta(10000, new.post2[1], new.post2[2])
 y.new2 <- rbinom(10000, size=17, prob=p.new2)

 # graph comparing initial prior and updated prior

 dp <- rbind(data.frame(Type="Prior", P=p.new),
            data.frame(Type="New Prior", P=p.new2))

 ggplot(dp, aes(P, color=Type)) + geom_density()

 ggplot(data.frame(Y=y.new), aes(Y)) + geom_bar() +
  geom_vline(xintercept = 4, color="red", size=3) +
  ggtitle("PREDICTIVE DENSITY FOR NUMBER OF HR IN NLCS")

 # Part II - predicting number of HR for 2015 World Series

 # update again to get my prior after the results of the two playoffs

 new.post3 <- new.post2 + c(4, 17-4)

 curve(dbeta(x, new.post3[1], new.post3[2]), 0, 0.07,
      xlab="P", ylab="Density", main="My Prior at Beginning of 2015 WS")

 # construct a prior on number of games and number of at-bats

 # Here is data I used to find the distribution of Murphy's game AB
 #library(XML)
 #d <- readHTMLTable('http://www.baseball-reference.com/players/gl.cgi?id=murphda08&t=b&year=2015')
 #gamelogs <- d[[3]]

 atbats <- c(3, 4, 5)
 atbats.probs <- c(16, 79, 19) / (16 + 79 + 19)

 # Here is the distribution of the length of the WS assuming
 # teams of equal ability

 length.series <- 4:7
 length.series.probs <- c(2 * .5 ^ 4, 2 * 4 * .5 ^ 5, 
                         2 * 10 * .5 ^ 6, 2 * 20 * .5 ^ 7)

 # do 10,000 simulations of predictive AB

 r.length.series <- sample(length.series, size=10000, 
                          prob=length.series.probs, 
                          replace=TRUE)
 get.ab <- function(j)
     sum(sample(atbats, size=r.length.series[j], 
              prob=atbats.probs, replace=TRUE))

 r.atbats <- sapply(1:1000, get.ab)

 # ready to simulate home runs

 p.new3 <- rbeta(10000, new.post3[1], new.post3[2])
 y <- rbinom(10000, size=r.atbats, prob=p.new3)

 # tabulate and graph the predictive distribution 
 # for the number of home runs

 TB <- round(table(y) / 10000, 3)
 TB
 K <- max(as.numeric(names(TB)))
 O <- data.frame(Number_of_Homeruns=0:K,
            Probability=round(as.numeric(table(y)) / 10000, 3))

 qplot(Number_of_Homeruns, Probability, 
      data = O, geom = "segment",
      yend = 0, xend = Number_of_Homeruns, size = I(10)) +
      ggtitle("Predictive Distribution of Number of Murphy Home Runs in WS")
	# Daniel Murphy Exercise

	# Part I -- learning about Murphy's home run ability
	# and updating this knowledge after the NLDS and NLCS

	library(ggplot2)
	library(LearnBayes)

	# career home run data for Murphy

	seasons <- c(2008, 2009, 2011:2015)
	hr <- c(2, 12, 6, 6, 13,9, 14)
	ab <- c(131, 508, 391, 571, 658, 596, 499)
	murphy <- data.frame(seasons, hr, ab)

	# fitting exchangeable model to this data

	fit <- laplace(betabinexch, c(1, 1), cbind(hr, ab))
	K <- exp(fit$mode[2])
	eta <- exp(fit$mode[1]) / (1 + exp(fit$mode[1]))

	# use the estimated random effects distribution as my initial prior for P

	new.post <- c(K * eta, K * (1 - eta)) # beta shape parameters

	# predictive simulation -- simulate [P] [Y \| P]
	# Y = number of home runs hit in 21 AB in NLDS

	p.new <- rbeta(10000, new.post[1], new.post[2])
	y.new <- rbinom(10000, size=21, prob=p.new)

	ggplot(data.frame(P=p.new), aes(P)) + geom_density() +
	ggtitle("MY PRIOR")

	ggplot(data.frame(Y=y.new), aes(Y)) + geom_bar() +
	geom_vline(xintercept = 3, color="red", size=3) +
	ggtitle("PREDICTIVE DENSITY FOR NUMBER OF HR IN NLDS")

	# we now observe HR data from first round - 3 out of 21

	# update my prior

	new.post2 <- new.post + c(3, 21 - 3)

	# predictive simulation
	# Y = number of home runs hit in 17 AB in NLCS

	p.new2 <- rbeta(10000, new.post2[1], new.post2[2])
	y.new2 <- rbinom(10000, size=17, prob=p.new2)

	# graph comparing initial prior and updated prior

	dp <- rbind(data.frame(Type="Prior", P=p.new),
	data.frame(Type="New Prior", P=p.new2))

	ggplot(dp, aes(P, color=Type)) + geom_density()

	ggplot(data.frame(Y=y.new), aes(Y)) + geom_bar() +
	geom_vline(xintercept = 4, color="red", size=3) +
	ggtitle("PREDICTIVE DENSITY FOR NUMBER OF HR IN NLCS")

	# Part II - predicting number of HR for 2015 World Series

	# update again to get my prior after the results of the two playoffs

	new.post3 <- new.post2 + c(4, 17-4)

	curve(dbeta(x, new.post3[1], new.post3[2]), 0, 0.07,
	xlab="P", ylab="Density", main="My Prior at Beginning of 2015 WS")

	# construct a prior on number of games and number of at-bats

	# Here is data I used to find the distribution of Murphy's game AB
	#library(XML)
	#d <- readHTMLTable('http://www.baseball-reference.com/players/gl.cgi?id=murphda08&t=b&year=2015')
	#gamelogs <- d[[3]]

	atbats <- c(3, 4, 5)
	atbats.probs <- c(16, 79, 19) / (16 + 79 + 19)

	# Here is the distribution of the length of the WS assuming
	# teams of equal ability

	length.series <- 4:7
	length.series.probs <- c(2 * .5 ^ 4, 2 * 4 * .5 ^ 5,
	2 * 10 * .5 ^ 6, 2 * 20 * .5 ^ 7)

	# do 10,000 simulations of predictive AB

	r.length.series <- sample(length.series, size=10000,
	prob=length.series.probs,
	replace=TRUE)
	get.ab <- function(j)
	sum(sample(atbats, size=r.length.series[j],
	prob=atbats.probs, replace=TRUE))

	r.atbats <- sapply(1:1000, get.ab)

	# ready to simulate home runs

	p.new3 <- rbeta(10000, new.post3[1], new.post3[2])
	y <- rbinom(10000, size=r.atbats, prob=p.new3)

	# tabulate and graph the predictive distribution
	# for the number of home runs

	TB <- round(table(y) / 10000, 3)
	TB
	K <- max(as.numeric(names(TB)))
	O <- data.frame(Number_of_Homeruns=0:K,
	Probability=round(as.numeric(table(y)) / 10000, 3))

	qplot(Number_of_Homeruns, Probability,
	data = O, geom = "segment",
	yend = 0, xend = Number_of_Homeruns, size = I(10)) +
	ggtitle("Predictive Distribution of Number of Murphy Home Runs in WS")
No results found