Skip to content

Instantly share code, notes, and snippets.

@bayesball
Created April 6, 2024 15:29
Show Gist options
  • Save bayesball/8ec179cd3b7b86bd849f047e07362680 to your computer and use it in GitHub Desktop.
Save bayesball/8ec179cd3b7b86bd849f047e07362680 to your computer and use it in GitHub Desktop.
Illustration of two algorithms for predicting home runs from distance and spray angle
# load some packages
library(dplyr)
library(ggplot2)
library(mgcv)
library(janitor)
library(metR)
# have Statcast data through games of April 5, 2024
statcast2024 |> filter(hit_distance_sc >= 300,
type == "X") |>
mutate(HR = ifelse(events == "home_run",
1, 0),
location_x = 2.5 * (hc_x - 125.42),
location_y = 2.5 * (198.27 - hc_y),
spray_angle = atan(location_x / location_y) * 180 / pi
) -> sc_ip_300
ii <- is.na(sc_ip_300$spray_angle)
sc_ip_300 <- sc_ip_300[!ii, ]
# fit logistic model
lm_fit <- glm(HR ~ hit_distance_sc +
spray_angle + I(spray_angle ^ 2),
family = binomial,
data = sc_ip_300)
coef(lm_fit)
# plot fit on a grid
grid <- expand.grid(hit_distance_sc =
seq(300, 450, length = 50),
spray_angle = seq(-45, 45, length = 50))
grid$lp <- predict(lm_fit, grid, type = "response")
ggplot(grid, aes(x = spray_angle, y = hit_distance_sc,
z = lp)) +
geom_contour_fill(breaks = seq(0, 1, by = .05)) +
scale_fill_distiller(palette="Spectral") +
theme(text=element_text(size=18)) +
ggtitle("Contours of P(HR) from Linear Model") +
theme(plot.title = element_text(colour = "blue", size = 18,
hjust = 0.5, vjust = 0.8, angle = 0))
## observe some interesting patterns in HR counts
sc_ip_300$SA <- cut(sc_ip_300$spray_angle,
breaks = seq(-50, 50, by = 5))
tabyl(sc_ip_300, SA, HR)
## one posterior predictive simulation
mn <- lm_fit$coefficients
v <- vcov(lm_fit)
X <- as.matrix(data.frame(1, lm_fit$model[, -1]))
N <- nrow(X)
invlogit <- function(y){
exp(y) / (1 + exp(y))
}
one_sim <- function(){
beta <- t(rmnorm(1, mn, v))
p <- invlogit(X %*% beta)
HR <- rbinom(N, size = 1, prob = p)
df <- data.frame(HR = HR,
SA = sc_ip_300$SA)
sd(tabyl(df, SA, HR)[[3]])
}
# run the predictive simulation 1000 times and
# display predictive plot
pred_SD <- replicate(1000, one_sim())
obs_SD <- sd(tabyl(sc_ip_300, SA, HR)[[3]])
ggplot(data.frame(Predicted = pred_SD),
aes(Predicted)) +
geom_histogram(color = "white", fill = "tan",
bins = 12) +
geom_vline(xintercept = obs_SD, color = "red",
linewidth = 2) +
theme(text=element_text(size=18)) +
ggtitle("Predictions of SD from Linear Model") +
theme(plot.title = element_text(colour = "blue", size = 18,
hjust = 0.5, vjust = 0.8, angle = 0)) +
annotate(geom = "text",
label = "OBSERVED",
x = 6.7, y = 200, size = 6, color = "red")
mean(pred_SD >= obs_SD)
# GAM fit
gam_fit <- gam(HR ~ hit_distance_sc +
s(spray_angle),
family = binomial,
data = sc_ip_300)
# display fit
grid1 <- expand.grid(hit_distance_sc = seq(300, 450, length = 50),
spray_angle = seq(-45, 45, length = 50))
grid1$lp <- predict(gam_fit, grid, type = "response")
ggplot(grid1, aes(x = spray_angle, y = hit_distance_sc,
z = lp)) +
geom_contour_fill(breaks = seq(0, 1, by = .05)) +
scale_fill_distiller(palette="Spectral") +
theme(text=element_text(size=18)) +
ggtitle("Contours of P(HR) from GAM Model") +
theme(plot.title = element_text(colour = "blue", size = 18,
hjust = 0.5, vjust = 0.8, angle = 0))
## posterior predictive simulation
PP <- predict(gam_fit, se.fit = TRUE)
N <- nrow(X)
invlogit <- function(y){
exp(y) / (1 + exp(y))
}
sc_ip_300$SA <- cut(sc_ip_300$spray_angle,
breaks = seq(-50, 50, by = 5))
tabyl(sc_ip_300, SA, HR) |> adorn_percentages()
one_sim2 <- function(){
require(LearnBayes)
require(janitor)
lp <- rnorm(N, PP$fit, PP$se.fit)
HR <- rbinom(N, size = 1, prob = invlogit(lp))
df <- data.frame(HR = HR,
SA = sc_ip_300$SA)
sd(tabyl(df, SA, HR)[[3]])
}
pred_SD <- replicate(1000, one_sim2())
hist(pred_SD)
obs_SD <- sd(tabyl(sc_ip_300, SA, HR)[[3]])
mean(pred_SD >= obs_SD)
# show two fits on one display
grid$Model <- "Logistic Fit"
grid1$Model <- "GAM Fit"
twogrids <- rbind(grid, grid1)
ggplot(twogrids, aes(x = spray_angle, y = hit_distance_sc,
z = lp)) +
geom_contour_fill(breaks = seq(0, 1, by = .05)) +
scale_fill_distiller(palette="Spectral") +
theme(text=element_text(size=18)) +
facet_wrap(~ Model) +
ylim(300, 430)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment