Last active
May 16, 2021 22:29
-
-
Save jthomasmock/7d489f04e53812eacca8b94b6c8ee84a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(tidyverse) | |
library(ggrepel) | |
library(glue) | |
library(lubridate) | |
library(gt) | |
library(ggforce) | |
url <- "https://www.pro-football-reference.com/teams/comebacks.htm" | |
list_of_teams <- c("atl", "buf", "car", "chi", "cin", "cle", "clt", "crd", "dal", "den", "det", | |
"gnb", "htx", "jax", "kan", "mia", "min", "nor", "nwe", "nyg", "nyj", | |
"oti", "phi", "pit", "rai", "ram", "rav", "sdg", 'sea', "sfo", "tam", "was") | |
scrape_comeback <- function(team){ | |
message(paste("Scraping", team)) | |
Sys.sleep(5) | |
url <- glue::glue("https://www.pro-football-reference.com/teams/{team}/comebacks.htm") | |
url %>% | |
read_html() %>% | |
html_table() %>% | |
.[[1]] %>% | |
janitor::clean_names() %>% | |
as_tibble() | |
} | |
chiefs_score <- tibble( | |
tm = "Kansas City Chiefs", | |
x = "", | |
opp = "Houston Texans", | |
date = "Jan 12, 2020", | |
pf = 51, | |
pa = 31, | |
deficit = 24, | |
margin = 20 | |
) | |
all_comebacks <- list_of_teams %>% | |
map_dfr(scrape_comeback) | |
plot_df <- all_comebacks %>% | |
mutate_at(vars(pf, pa, deficit), as.numeric) %>% | |
rowwise() %>% | |
mutate(margin = pf - pa) %>% | |
bind_rows(chiefs_score) %>% | |
mutate(swing = margin + deficit) %>% | |
mutate(date = str_remove(date, "\\*"), | |
date_iso = readr::parse_date(date, format = "%b %d, %Y"), | |
year = lubridate::year(date_iso)) %>% | |
arrange(desc(swing)) %>% | |
filter(year >= 1968) | |
margin_plot <- plot_df %>% | |
ggplot(aes(x = deficit, y= margin)) + | |
annotate('segment', | |
xend = 24, x = 24, y = 0, yend = 20, color = "red", size = 3, alpha = 0.2) + | |
geom_point(alpha = 0.5, aes(color = if_else(deficit >= 24 & margin >= 20, "red", "black")), | |
size = 3) + | |
# geom_text(x = 24, y = 20, label = "(24,20)",vjust = -1) + | |
geom_mark_circle(data = filter(plot_df, year == 2020, deficit == 24), | |
aes(label = paste(tm, 2020, sep = " - ")), | |
color = "red", label.colour = "red", con.colour = "red", | |
label.fill = NA) + | |
geom_hline(yintercept = 0) + | |
scale_color_identity() + | |
theme_minimal() + | |
labs(caption = "\nPlot: @thomas_mock | Data: ProFootballReference", | |
x = "\nLargest Deficit", | |
y = "Margin of Victory\n", | |
title = "KC's comeback from 24 points down was historic", | |
subtitle = "Teams with deficit of 24 points or more & margin of victory of 20 or more in red") + | |
theme(axis.title = element_text(face = "bold"), | |
panel.grid.minor = element_blank(), | |
plot.title = element_text(size = 20, face = "bold"), | |
plot.subtitle = element_text(size = 16), | |
axis.text = element_text(size = 16), | |
axis.title.x = element_text(size = 20), | |
axis.title.y = element_text(size = 20)) + | |
scale_x_continuous(breaks = seq(5, 30, 5)) + | |
scale_y_continuous(breaks = seq(0, 30, 5)) | |
margin_plot | |
ggsave("margin_plot.png", margin_plot, height = 10, width = 10, units = "in", dpi = "retina") | |
plot_df | |
gt_table <- plot_df %>% | |
filter(deficit >= 24) %>% | |
select(tm, opp, date_iso, pf:swing) %>% | |
gt::gt() %>% | |
cols_merge(vars(pf), | |
vars(pa), | |
pattern = "{1}-{2}") %>% | |
cols_label(tm = "Team", opp = "Opp", date_iso = "Date", | |
margin = "Margin", swing = "Swing", pf = "Score", deficit = "Deficit") %>% | |
data_color( | |
columns = vars(margin), | |
colors = scales::col_numeric( | |
palette = c("#F8F8F8","#30a2da"), | |
domain = NULL | |
) | |
) %>% | |
data_color( | |
columns = vars(swing), | |
colors = scales::col_numeric( | |
palette = c("#F8F8F8","#30a2da"), | |
domain = NULL | |
) | |
) %>% | |
tab_style( | |
style = cell_text( | |
weight = "bold" | |
), | |
locations = cells_data( | |
rows = tm == "Kansas City Chiefs" | |
) | |
) %>% | |
tab_header(title = "Kansas City's comeback was historic", | |
subtitle = "The largest swing and margin of victory ever for a deficit of 24 points or more") %>% | |
tab_source_note("Table: @thomas_mock | Data: Pro Football Reference") %>% | |
tab_footnote(footnote = "Margin = margin of victory", | |
locations = cells_column_labels( | |
columns = vars(margin) | |
) | |
) %>% | |
tab_footnote(footnote = "Deficit = largest deficit at any point in the game", | |
locations = cells_column_labels( | |
columns = vars(deficit) | |
) | |
) %>% | |
tab_footnote(footnote = "Swing = margin of victory + deficit", | |
locations = cells_column_labels( | |
columns = vars(swing) | |
) | |
) | |
gt_table | |
gtsave(gt_table, "kc_table.png") | |
kc_bar <- plot_df %>% | |
filter(deficit >= 24) %>% | |
mutate(tm_year = paste(tm, as.character(year), sep = "-"), | |
tm_year = fct_reorder(tm_year, swing)) %>% | |
select(tm_year, pf:swing) %>% | |
ggplot(aes(x = fct_reorder(tm_year, swing), y = swing)) + | |
geom_col(aes(fill = if_else(tm_year == "Kansas City Chiefs-2020", "red", "grey"))) + | |
geom_text(aes(label = margin), hjust = 1.5, color = "white", fontface = "bold", size = 6) + | |
geom_hline(yintercept = 0, size = 1, color = "black") + | |
scale_fill_identity() + | |
coord_flip() + | |
theme_minimal() + | |
theme(panel.grid.minor = element_blank(), | |
panel.grid.major.y = element_blank(), | |
axis.text = element_text(face = "bold", size = 14, color = "black"), | |
axis.title = element_text(size = 16), | |
plot.title = element_text(face = "bold", size = 20)) + | |
labs(x = "", | |
y = "\nPoint Swing", | |
title = "Kansas City's comeback from 24 points down was historic", | |
subtitle = "Point Swing = Deficit + Margin of Victory\nInset number = Margin of Victory", | |
caption = "\nPlot: @thomas_mock | Data: Pro Football Reference") | |
kc_bar | |
ggsave("kc_bar.png", kc_bar, height = 12, width = 14, units = "in", dpi = "retina") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment