Last active
May 20, 2023 18:28
-
-
Save bayesball/dfd557feb4b86331f8de15f90e675c54 to your computer and use it in GitHub Desktop.
R function to fit a multilevel quadratic smoothing model to season-to-season AVG data for any player of interest.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
player_function_lb <- function(player_id){ | |
# uses LearnBayes package to simulate from | |
# multilevel model | |
library(dplyr) | |
library(ggplot2) | |
library(Lahman) | |
library(LearnBayes) | |
increasefont <- function(Size = 18){ | |
theme(text = element_text(size = Size)) | |
} | |
centertitle <- function (Color = "blue"){ | |
theme(plot.title = element_text(colour = Color, size = 18, | |
hjust = 0.5, vjust = 0.8, angle = 0)) | |
} | |
# some data work | |
People %>% | |
filter(playerID == player_id) %>% | |
mutate(player_name = paste(nameFirst, nameLast)) %>% | |
pull(player_name) -> player_name | |
Batting %>% | |
filter(playerID == player_id) %>% | |
group_by(yearID) %>% | |
summarize(H = sum(H), | |
AB = sum(AB)) %>% | |
mutate(AVG = H / AB, | |
year = yearID - min(yearID) + 1) %>% | |
select(yearID, year, AB, H, AVG) -> d | |
# model description -- definition of logpost of hyperparameters | |
qlogpost <- function (theta, data) { | |
invlogit <- function(x) exp(x) / (1 + exp(x)) | |
K <- exp(theta[4]) | |
y <- data[, 1] | |
n <- data[, 2] | |
x <- data[, 3] | |
eta <- invlogit(theta[1] + theta[2] * x + theta[3] * x ^ 2) | |
logf <- function(y, n, K, eta){ lbeta(K * eta + y, | |
K * (1 - eta) + n - y) - | |
lbeta(K * eta, K * (1 - eta)) | |
} | |
val <- sum(logf(y, n, K, eta)) | |
val <- val + theta[4] - 2 * log(1 + exp(theta[4])) | |
return(val) | |
} | |
# fit using Laplace approximation | |
fit <- laplace(qlogpost, c(0, 0, 0, 0), | |
as.matrix(d[, c("H", "AB", "year")])) | |
# compute posterior means of probs | |
post_mean <- function(j){ | |
invlogit <- function(x) exp(x) / (1 + exp(x)) | |
lp <- fit$mode[1] + fit$mode[2] * d$year[j] + | |
fit$mode[3] * d$year[j] ^ 2 | |
m <- mean(invlogit(lp)) | |
K <- exp(fit$mode[4]) | |
(d$H[j] + m * K) / (d$AB[j] + K) | |
} | |
indices <- 1:dim(d)[1] | |
pmeans <- sapply(indices, post_mean) | |
# quadratic fit | |
qfit <- lm(AVG ~ yearID + I(yearID ^ 2), | |
data = d) | |
b <- coef(qfit) | |
d1 <- select(d, yearID) %>% | |
mutate(AVG = b[1] + b[2] * yearID + b[3] * yearID ^ 2, | |
Type = "Quadratic Fit") -> d1 | |
d2 <- select(d, yearID, AVG) %>% | |
mutate(Type = "Observed") | |
d3 <- data.frame(yearID = d$yearID, | |
post_means = pmeans) %>% | |
mutate(AVG = post_means, | |
Type = "Multilevel") %>% | |
select(yearID, AVG, Type) | |
d123 <- rbind(d1, d2, d3) %>% | |
mutate(Player = player_name) | |
plot1 <- ggplot(d123, aes(yearID, AVG, color = Type)) + | |
geom_point(size = 3) + | |
geom_line(linewidth = 0.8, | |
linetype = 2) + | |
increasefont() + | |
centertitle() + | |
ggtitle(paste(player_name, "Batting Averages")) + | |
xlab("Season") + | |
ylab("PROB Estimate") | |
# simulate from posterior of hyperparameters | |
sim_h <- rmnorm(5000, fit$mode, fit$var) | |
# simulate from posterior of pj | |
sim_pj <- function(j){ | |
invlogit <- function(x) exp(x) / (1 + exp(x)) | |
lp <- sim_h[, 1] + sim_h[, 2] * d$year[j] + | |
sim_h[, 3] * d$year[j] ^ 2 | |
m <- invlogit(lp) | |
K <- exp(sim_h[, 4]) | |
rbeta(5000, d$H[j] + K * m , | |
d$AB[j] - d$H[j] + K * (1 - m)) | |
} | |
# collect simulations from max p | |
indices <- 1:dim(d)[1] | |
out <- sapply(indices, sim_pj) | |
maxp <- apply(out, 1, max) | |
newdf <- data.frame(Max_P = maxp) %>% | |
mutate(Player = player_name) | |
plot2 <- ggplot(newdf, aes(Max_P)) + | |
geom_density(linewidth = 1.5, color = "red") + | |
increasefont() + | |
centertitle() + | |
ylab("Density") + | |
ggtitle(paste(player_name, | |
"Posterior of Maximum Probability")) | |
list(d123 = d123, | |
newdf = newdf, | |
plot1 = plot1, | |
plot2 = plot2) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment