Skip to content

Instantly share code, notes, and snippets.

@bayesball
Last active May 20, 2023 18:28
Show Gist options
  • Save bayesball/dfd557feb4b86331f8de15f90e675c54 to your computer and use it in GitHub Desktop.
Save bayesball/dfd557feb4b86331f8de15f90e675c54 to your computer and use it in GitHub Desktop.
R function to fit a multilevel quadratic smoothing model to season-to-season AVG data for any player of interest.
player_function_lb <- function(player_id){
# uses LearnBayes package to simulate from
# multilevel model
library(dplyr)
library(ggplot2)
library(Lahman)
library(LearnBayes)
increasefont <- function(Size = 18){
theme(text = element_text(size = Size))
}
centertitle <- function (Color = "blue"){
theme(plot.title = element_text(colour = Color, size = 18,
hjust = 0.5, vjust = 0.8, angle = 0))
}
# some data work
People %>%
filter(playerID == player_id) %>%
mutate(player_name = paste(nameFirst, nameLast)) %>%
pull(player_name) -> player_name
Batting %>%
filter(playerID == player_id) %>%
group_by(yearID) %>%
summarize(H = sum(H),
AB = sum(AB)) %>%
mutate(AVG = H / AB,
year = yearID - min(yearID) + 1) %>%
select(yearID, year, AB, H, AVG) -> d
# model description -- definition of logpost of hyperparameters
qlogpost <- function (theta, data) {
invlogit <- function(x) exp(x) / (1 + exp(x))
K <- exp(theta[4])
y <- data[, 1]
n <- data[, 2]
x <- data[, 3]
eta <- invlogit(theta[1] + theta[2] * x + theta[3] * x ^ 2)
logf <- function(y, n, K, eta){ lbeta(K * eta + y,
K * (1 - eta) + n - y) -
lbeta(K * eta, K * (1 - eta))
}
val <- sum(logf(y, n, K, eta))
val <- val + theta[4] - 2 * log(1 + exp(theta[4]))
return(val)
}
# fit using Laplace approximation
fit <- laplace(qlogpost, c(0, 0, 0, 0),
as.matrix(d[, c("H", "AB", "year")]))
# compute posterior means of probs
post_mean <- function(j){
invlogit <- function(x) exp(x) / (1 + exp(x))
lp <- fit$mode[1] + fit$mode[2] * d$year[j] +
fit$mode[3] * d$year[j] ^ 2
m <- mean(invlogit(lp))
K <- exp(fit$mode[4])
(d$H[j] + m * K) / (d$AB[j] + K)
}
indices <- 1:dim(d)[1]
pmeans <- sapply(indices, post_mean)
# quadratic fit
qfit <- lm(AVG ~ yearID + I(yearID ^ 2),
data = d)
b <- coef(qfit)
d1 <- select(d, yearID) %>%
mutate(AVG = b[1] + b[2] * yearID + b[3] * yearID ^ 2,
Type = "Quadratic Fit") -> d1
d2 <- select(d, yearID, AVG) %>%
mutate(Type = "Observed")
d3 <- data.frame(yearID = d$yearID,
post_means = pmeans) %>%
mutate(AVG = post_means,
Type = "Multilevel") %>%
select(yearID, AVG, Type)
d123 <- rbind(d1, d2, d3) %>%
mutate(Player = player_name)
plot1 <- ggplot(d123, aes(yearID, AVG, color = Type)) +
geom_point(size = 3) +
geom_line(linewidth = 0.8,
linetype = 2) +
increasefont() +
centertitle() +
ggtitle(paste(player_name, "Batting Averages")) +
xlab("Season") +
ylab("PROB Estimate")
# simulate from posterior of hyperparameters
sim_h <- rmnorm(5000, fit$mode, fit$var)
# simulate from posterior of pj
sim_pj <- function(j){
invlogit <- function(x) exp(x) / (1 + exp(x))
lp <- sim_h[, 1] + sim_h[, 2] * d$year[j] +
sim_h[, 3] * d$year[j] ^ 2
m <- invlogit(lp)
K <- exp(sim_h[, 4])
rbeta(5000, d$H[j] + K * m ,
d$AB[j] - d$H[j] + K * (1 - m))
}
# collect simulations from max p
indices <- 1:dim(d)[1]
out <- sapply(indices, sim_pj)
maxp <- apply(out, 1, max)
newdf <- data.frame(Max_P = maxp) %>%
mutate(Player = player_name)
plot2 <- ggplot(newdf, aes(Max_P)) +
geom_density(linewidth = 1.5, color = "red") +
increasefont() +
centertitle() +
ylab("Density") +
ggtitle(paste(player_name,
"Posterior of Maximum Probability"))
list(d123 = d123,
newdf = newdf,
plot1 = plot1,
plot2 = plot2)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment