Last active
March 18, 2021 01:17
-
-
Save benjaminrobinson/b8f796433a79908897c03178f545d73f to your computer and use it in GitHub Desktop.
Exploring the Relationship Between Regular Season and Postseason Success
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(gghighlight) | |
library(ggthemes) | |
read_csv("https://github.com/leesharpe/nfldata/raw/master/data/games.csv") %>% | |
select(season, | |
game_type, | |
game_id, | |
home_score, | |
away_score, | |
home_team, | |
away_team) %>% | |
mutate( | |
season_type = ifelse(game_type == 'REG', 'Regular', 'Post'), | |
winner = ifelse( | |
home_score > away_score, | |
home_team, | |
ifelse(away_score > home_score, away_team, 'Tie') | |
), | |
loser = ifelse( | |
home_score > away_score, | |
away_team, | |
ifelse(away_score > home_score, home_team, 'Tie') | |
) | |
) -> tmp | |
bind_rows( | |
tmp %>% | |
select(season, season_type, game_id, team = home_team, winner, loser) %>% | |
mutate( | |
team = case_when( | |
team %in% c('LA', 'STL') ~ 'LAR', | |
team == 'SD' ~ 'LAC', | |
team == 'OAK' ~ 'LV', | |
team == 'WAS' ~ 'WFT', | |
TRUE ~ team | |
), | |
winner = case_when( | |
winner %in% c('LA', 'STL') ~ 'LAR', | |
winner == 'SD' ~ 'LAC', | |
winner == 'OAK' ~ 'LV', | |
winner == 'WAS' ~ 'WFT', | |
TRUE ~ winner | |
), | |
loser = case_when( | |
loser %in% c('LA', 'STL') ~ 'LAR', | |
loser == 'SD' ~ 'LAC', | |
loser == 'OAK' ~ 'LV', | |
loser == 'WAS' ~ 'WFT', | |
TRUE ~ loser | |
), | |
w = ifelse(team == winner, 1, | |
ifelse(winner == 'Tie', NA, 0)) | |
), | |
tmp %>% | |
select(season, season_type, game_id, team = away_team, winner, loser) %>% | |
mutate( | |
team = case_when( | |
team %in% c('LA', 'STL') ~ 'LAR', | |
team == 'SD' ~ 'LAC', | |
team == 'OAK' ~ 'LV', | |
team == 'WAS' ~ 'WFT', | |
TRUE ~ team | |
), | |
w = ifelse(team == winner, 1, | |
ifelse(winner == 'Tie', NA, 0)) | |
) | |
) %>% | |
group_by(season_type, team) %>% | |
summarize( | |
n = n(), | |
wins = sum(w, na.rm = TRUE), | |
ties = sum(is.na(w)), | |
losses = n - wins - ties, | |
win_perc = wins / n, | |
losing_perc = losses / n, | |
.groups = 'drop' | |
) %>% | |
gather(key, value,-team,-season_type) %>% | |
filter(key == 'win_perc') %>% | |
spread(season_type, value) %>% | |
select(-key) -> dat | |
ggplot(dat, aes(x = Regular, y = Post, group = team)) + | |
geom_point(size = 3) + | |
theme_fivethirtyeight() + | |
scale_x_continuous(labels = scales::percent, limits = c(0, 1)) + | |
scale_y_continuous(labels = scales::percent, limits = c(0, 1)) + | |
geom_abline( | |
slope = 1, | |
intercept = 0, | |
linetype = 2, | |
color = 'red' | |
) + | |
gghighlight(team == 'CIN', use_direct_label = TRUE) + | |
labs( | |
x = "Regular Season Winning Percentage", | |
y = "Postseason Winning Percentage", | |
title = "Testing a Hypothesis", | |
subtitle = "Regular vs Postseason Success (1999 - 2020)", | |
caption = "Chart by: Benjamin Robinson (@benj_robinson) | Source: NFLGameData.com, @LeeSharpeNFL" | |
) | |
ggsave( | |
"REG_POST_WINNING_PERCENTAGE_RELATIONSHIP.png", | |
width = 8, | |
height = 8, | |
units = 'in', | |
dpi = 96 | |
) | |
lm(data = dat, formula = Post ~ Regular) %>% | |
summary | |
cor(dat$Regular, dat$Post) | |
dat$pred <- predict( | |
object = lm(data = dat, formula = Post ~ Regular), | |
newdata = dat) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment