Created
December 2, 2024 16:27
-
-
Save k5cents/5778904c6d77c8d4067c6f41e964d922 to your computer and use it in GitHub Desktop.
A simple monte carlo simulation of Monday Night Football scores to estimate playoff outcome probabilities
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(fflr) | |
# Connect to the league | |
ffl_id(252353) | |
# Fetch league standings and live scoring data | |
s <- league_standings() %>% | |
select(abbrev, wins, pointsFor) | |
l <- live_scoring(bonusWin = TRUE) | |
match_id <- l %>% | |
select(abbrev, matchupId) | |
# Fetch team rosters and include scores for players who have already played | |
rosters <- team_roster() | |
# Separate finished player scores and remaining player projections | |
finished_scores <- rosters %>% | |
bind_rows() %>% | |
filter(!is.na(actualScore), !lineupSlot %in% c("BE", "IR")) %>% | |
group_by(abbrev) %>% | |
summarize(totalFinishedScore = sum(actualScore, na.rm = TRUE), .groups = "drop") | |
players_remaining <- rosters %>% | |
bind_rows() %>% | |
filter(is.na(actualScore), !lineupSlot %in% c("BE", "IR")) %>% | |
select(abbrev, lineupSlot, lastName, projectedScore) %>% | |
mutate(lineupSlot = as.character(lineupSlot)) | |
# Define maximum plausible scores by position | |
max_scores <- c(`QB` = 40, `RB` = 30, `WR` = 30, `TE` = 25, | |
`FLEX` = 30, `D/ST` = 15, `K` = 15) | |
# Add max plausible scores to players_remaining | |
players_remaining <- players_remaining %>% | |
mutate( | |
maxScore = max_scores[lineupSlot], | |
scaledScore = projectedScore / maxScore, # Scale to [0, 1] | |
alpha = scaledScore * 10 + 1, # Shape parameter for beta | |
beta = (1 - scaledScore) * 10 + 1 | |
) | |
# Monte Carlo simulation parameters | |
set.seed(42) # Ensure reproducibility | |
n_sims <- 1000000 | |
# Simulate outcomes for each player using the beta distribution | |
simulated_scores <- players_remaining %>% | |
crossing(sim_id = 1:n_sims) %>% | |
rowwise() %>% # Ensure row-wise operations | |
mutate( | |
betaValue = rbeta(1, alpha, beta), # Generate beta-distributed random value for each row | |
simulatedScore = betaValue * maxScore # Rescale to plausible range | |
) %>% | |
ungroup() # Remove row-wise grouping | |
simulated_scores %>% | |
ggplot(aes(simulatedScore)) + | |
geom_histogram() + | |
facet_wrap(~lastName, ncol = 1) | |
# Prepare a full list of all teams | |
all_teams <- s %>% | |
select(abbrev) %>% # Get all team abbreviations | |
left_join(finished_scores, by = "abbrev") %>% # Include finished scores | |
mutate(totalFinishedScore = replace_na(totalFinishedScore, 0)) # Set missing finished scores to 0 | |
# Add simulated scores for teams with remaining players | |
simulated_team_scores <- players_remaining %>% | |
crossing(sim_id = 1:n_sims) %>% | |
rowwise() %>% | |
mutate( | |
betaValue = rbeta(1, alpha, beta), # Generate beta-distributed random value for each row | |
simulatedScore = betaValue * maxScore # Rescale to plausible range | |
) %>% | |
ungroup() %>% # Remove row-wise grouping | |
group_by(sim_id, abbrev) %>% | |
summarize(simulatedTeamScore = sum(simulatedScore, na.rm = TRUE), .groups = "drop") | |
# Combine all teams with simulated scores | |
total_team_scores <- all_teams %>% | |
crossing(sim_id = 1:n_sims) %>% # Add simulation IDs for all teams | |
left_join(simulated_team_scores, by = c("abbrev", "sim_id")) %>% # Add simulated scores | |
mutate( | |
simulatedTeamScore = replace_na(simulatedTeamScore, 0), # Set missing simulated scores to 0 | |
totalTeamScore = totalFinishedScore + simulatedTeamScore # Combine finished and simulated scores | |
) | |
# Calculate new standings based on simulation results | |
results <- total_team_scores %>% | |
left_join(s, by = "abbrev") %>% | |
left_join(match_id, by = "abbrev") %>% | |
group_by(matchupId, sim_id) %>% # Group by matchup and simulation ID | |
mutate( | |
matchupWin = totalTeamScore == max(totalTeamScore) # Matchup win for the highest score in the matchup | |
) %>% | |
ungroup() %>% | |
mutate( | |
finalScore = pointsFor + totalTeamScore, # Add total team score to current points | |
bonusWin = totalTeamScore > median(totalTeamScore), # Top half gets a bonus win | |
newWins = wins + bonusWin + matchupWin # Add wins based on matchup and bonus | |
) %>% | |
group_by(sim_id) %>% | |
arrange(desc(newWins), desc(finalScore)) %>% | |
mutate(rank = row_number()) %>% | |
ungroup() | |
# Calculate playoff probabilities | |
playoff_probabilities <- results %>% | |
filter(rank <= 4) %>% | |
count(abbrev, name = "playoffAppearances") %>% | |
mutate(probability = playoffAppearances / n_sims) | |
playoff_probabilities | |
# ------------------------------------------------------------------------- | |
# Calculate the probability of each team's final rank | |
rank_probabilities <- results %>% | |
group_by(abbrev, rank) %>% | |
summarize( | |
count = n(), # Count occurrences of each rank for each team | |
.groups = "drop" | |
) %>% | |
mutate(probability = count / n_sims) %>% | |
arrange(abbrev, rank) | |
rank_probabilities %>% | |
mutate(lbl = scales::percent(probability, 1)) %>% | |
arrange(rank, desc(probability)) %>% | |
mutate(abbrev = fct_rev(as_factor(as.character(abbrev)))) %>% | |
ggplot(aes(x = rank, y = abbrev, fill = probability)) + | |
geom_tile(color = "black") + | |
geom_text(aes(label = lbl)) + | |
scale_fill_viridis_c() + | |
scale_x_continuous(breaks = 1:10) + | |
labs( | |
title = "Probability of Final Rank for Each Team", | |
x = "Final Rank", | |
y = "Team", | |
fill = "Probability" | |
) + | |
theme_minimal() + | |
coord_equal() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment