Created
October 10, 2024 01:41
-
-
Save bayesball/7a8b3517f8d05aaffcd0c4e86bd9f607 to your computer and use it in GitHub Desktop.
R function to compute expected runs for different bases/outs states from event table from statsapi.mlb.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
compute_expected_runs <- function(events){ | |
# function computes expected runs for event table | |
# obtained via statsapi.mlb.com | |
require(dplyr) | |
# find the total runs scored and number of outs each half-inning | |
events |> | |
group_by(game_id, inning, half_inning) |> | |
summarize(max_runs = sum(runs_on_event), | |
total_outs = sum(post_outs - pre_outs), | |
.groups = "drop") -> TotalRuns | |
# create bases, new_bases, state, new_state variables | |
events |> | |
mutate(bases = paste(ifelse(is.na(pre_runner_1b_id), 0, 1), | |
ifelse(is.na(pre_runner_2b_id), 0, 1), | |
ifelse(is.na(pre_runner_3b_id), 0, 1), | |
sep = ""), | |
new_bases = paste(ifelse(is.na(post_runner_1b_id), 0, 1), | |
ifelse(is.na(post_runner_2b_id), 0, 1), | |
ifelse(is.na(post_runner_3b_id), 0, 1), | |
sep = ""), | |
state = paste(bases, pre_outs), | |
new_state = paste(new_bases, post_outs)) -> events | |
# for each event, find the runs scored in the half_inning | |
# before or at that event | |
events |> | |
group_by(game_id, inning, half_inning) |> | |
mutate(cum_runs = cumsum(runs_on_event)) |> | |
ungroup() -> events | |
# merge dataset with total runs scored dataset | |
# create runs_roi variable | |
inner_join(events, TotalRuns, | |
by = c("game_id", "inning", "half_inning")) |> | |
mutate(runs_roi = | |
max_runs - cum_runs + runs_on_event) |> | |
rename(outs = pre_outs) -> events | |
# only consider situations where total outs is 3 | |
# and there is a change in states or runs scored | |
events |> | |
filter(state != new_state | runs_on_event > 0) |> | |
filter(total_outs == 3) -> events | |
# find runs expectancies | |
events |> | |
group_by(bases, outs) |> | |
summarize(ExpectedRuns = mean(runs_roi), | |
.groups = "drop") | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment