Created
March 26, 2024 19:43
-
-
Save bayesball/e7d56e5edf31d7a71c6bf40cc1dfe743 to your computer and use it in GitHub Desktop.
Quarto file that collects retrosheet data and compares count rates for two seasons
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Retrosheet Package - Comparing Count Rates for Two Seasons" | |
format: html | |
editor: visual | |
--- | |
Load packages for this particular run. | |
```{r} | |
#| message: FALSE | |
library(abdwr3edata) | |
library(tidyr) | |
library(dplyr) | |
library(purrr) | |
library(retrosheet) | |
library(ggplot2) | |
``` | |
Function to collect Retrosheet play-by-play files for a particular season. Collects the play files and adds game ids and starting pitchers. | |
```{r} | |
get_my_retrosheet <- function(season){ | |
t <- get_retrosheet("roster", season) | |
teams <- names(t) | |
one_team_retrieve <- function(team){ | |
d <- get_retrosheet("play", season, team) | |
one_game <- function(j){ | |
dj <- d[[j]] | |
game_id <- unlist(dj$id)[1] | |
dj$play |> | |
mutate(GAMEID = game_id, | |
count = as.character(count)) | |
} | |
NG <- length(d) | |
map(1:NG, one_game) |> | |
list_rbind() | |
} | |
teams |> | |
map(one_team_retrieve) |> | |
list_rbind() -> DD | |
} | |
``` | |
Function will add count variables to a Retrosheet play-by-play data frame and summarize the percentage of plate appearances that will have each possible count. | |
```{r} | |
add_count_variables <- function(d){ | |
d |> | |
rename(pitch_seq_tx = pitches) |> | |
retrosheet_add_counts() |> | |
summarize(C00 = sum(c00), | |
C10 = sum(c10), | |
C01 = sum(c01), | |
C20 = sum(c20), | |
C11 = sum(c11), | |
C02 = sum(c02), | |
C30 = sum(c30), | |
C21 = sum(c21), | |
C12 = sum(c12), | |
C31 = sum(c31), | |
C22 = sum(c22), | |
C32 = sum(c32)) |> | |
mutate(P10 = 100 * C10 / C00, | |
P01 = 100 * C01 / C00, | |
P20 = 100 * C20 / C00, | |
P11 = 100 * C11 / C00, | |
P02 = 100 * C02 / C00, | |
P30 = 100 * C30 / C00, | |
P21 = 100 * C21 / C00, | |
P12 = 100 * C12 / C00, | |
P31 = 100 * C31 / C00, | |
P22 = 100 * C22 / C00, | |
P32 = 100 * C32 / C00) |> | |
select(P10, P01, P20, P11, P02, | |
P30, P21, P12, P31, P22, P32) | |
} | |
``` | |
Collects Retrosheet data for the 1995 and 2023 seasons. | |
```{r} | |
d_1995 <- get_my_retrosheet(1995) | |
d_2023 <- get_my_retrosheet(2023) | |
``` | |
Here's what this data looks like: | |
```{r} | |
head(d_2023) | |
``` | |
Summarizes the count data for each of the two seasons. | |
```{r} | |
(S_2023 <- add_count_variables(d_2023)) | |
(S_1995 <- add_count_variables(d_1995)) | |
``` | |
The objective is to compare the count percentages for the two seasons. Puts data in a different "long" form so we can compare the percentages for the two seasons using logits. | |
```{r} | |
logit <- function(y){ | |
log(y / 100) - log(1 - y / 100) | |
} | |
T_1995 <- pivot_longer(S_1995, | |
cols = starts_with("P"), | |
names_to = "Count", | |
values_to = "Pct") | |
T_2023 <- pivot_longer(S_2023, | |
cols = starts_with("P"), | |
names_to = "Count", | |
values_to = "Pct") | |
inner_join(T_1995, T_2023, by = "Count") |> | |
mutate(Count = paste(substr(Count, 2, 2), | |
substr(Count, 3, 3), | |
sep = "-"), | |
Logit_Change = logit(Pct.y) - logit(Pct.x), | |
Count = factor(Count, | |
levels = c("0-1", "1-0", | |
"0-2", "1-1", "2-0", | |
"1-2", "2-1", "3-0", | |
"2-2", "3-1", | |
"3-2"))) -> T12 | |
``` | |
Constructs the graph of the logit differences for all counts. | |
```{r} | |
ggplot(T12, aes(Logit_Change, Count)) + | |
geom_vline(xintercept = 0, linewidth = 1.5, color = "red") + | |
geom_point(size = 4) + | |
theme(text=element_text(size=18)) + | |
ggtitle("Change in Logits from 1995 to 2023") + | |
xlab("Logit 2023 Minus Logit 1985") + | |
theme(plot.title = element_text(colour = "blue", size = 18, | |
hjust = 0.5, vjust = 0.8, angle = 0)) | |
``` | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment