Last active
May 16, 2021 22:38
-
-
Save friscojosh/78bcfce99bd976ec91e8b4840f99eacd to your computer and use it in GitHub Desktop.
Defense doesn't matter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################### | |
### Testing whether YPA allowed and | |
### AYA allowed by a defense matters when | |
### projecting an offense's YPA or AYA | |
### coming into a particular week. | |
########################################### | |
### SQL used to generate the data from nfldb by burntsushi | |
### nflplay, a dependency is maintained seperately | |
# DROP TABLE IF EXISTS __defense_study; | |
# CREATE TABLE __defense_study AS | |
# SELECT a.gsis_id, | |
# c.season_year as season, | |
# c.week, a.passing_att, | |
# a.passing_cmp_air_yds, | |
# a.passing_incmp_air_yds, | |
# a.receiving_yac_yds, | |
# a.receiving_yds, | |
# a.receiving_rec, | |
# a.receiving_tds, | |
# a.rushing_yds, | |
# a.rushing_att, | |
# a.rushing_tds, | |
# | |
# CASE | |
# WHEN b.pos_team = c.home_team THEN c.away_team | |
# ELSE c.home_team | |
# END AS defense, | |
# CASE | |
# WHEN b.pos_team = c.home_team THEN c.home_team | |
# ELSE c.away_team | |
# END AS opponent | |
# FROM agg_play a | |
# INNER JOIN drive b ON a.drive_id = b.drive_id AND a.gsis_id = b.gsis_id | |
# INNER JOIN game c ON a.gsis_id = c.gsis_id | |
# WHERE c.season_type = 'Regular'; | |
library(tidyverse) | |
library(broom) | |
library(knitr) | |
### Read in the data - available here: https://www.dropbox.com/s/y2nmzg5kjmh2kmi/__defense_study.csv?dl=0 | |
defense <- read_csv("data/__defense_study.csv") | |
### We'll need this later | |
results <- data.frame() | |
### A for loop to own the libs | |
for (week_to_split in 3:16) { | |
print(paste("Testing week", week_to_split)) | |
defense_no_2018 <- defense %>% | |
filter(season != 2018) | |
first_half_def <- defense_no_2018 %>% | |
filter(week <= week_to_split) %>% | |
filter(passing_att == 1) %>% | |
mutate(airyards = passing_cmp_air_yds + passing_incmp_air_yds, | |
pacr = ifelse(airyards == 0, receiving_yds, receiving_yds / airyards), | |
aya = round((receiving_yds + 20 * receiving_tds) / passing_att, 2)) %>% | |
group_by(defense, season) %>% | |
summarize(ypa = mean(receiving_yds), | |
aya = mean(aya)) | |
first_half_off <- defense_no_2018 %>% | |
filter(week <= week_to_split) %>% | |
filter(passing_att == 1) %>% | |
mutate(airyards = passing_cmp_air_yds + passing_incmp_air_yds, | |
pacr = ifelse(airyards == 0, receiving_yds, receiving_yds / airyards), | |
aya = round((receiving_yds + 20 * receiving_tds) / passing_att, 2)) %>% | |
group_by(opponent, season) %>% | |
summarize(ypa = mean(receiving_yds), | |
aya = mean(aya)) | |
week_to_test <- defense_no_2018 %>% | |
filter(week == week_to_split + 1) %>% | |
filter(passing_att == 1) %>% | |
mutate(airyards = passing_cmp_air_yds + passing_incmp_air_yds, | |
pacr = ifelse(airyards == 0, receiving_yds, receiving_yds / airyards), | |
aya = round((receiving_yds + 20 * receiving_tds) / passing_att, 2)) %>% | |
group_by(opponent, defense, season) %>% | |
summarize(ypa = mean(receiving_yds), | |
aya = mean(aya)) %>% | |
ungroup() | |
colnames(week_to_test) <- c("offense", "defense", "season", "test_ypa", "test_aya") | |
colnames(first_half_off) <- c("offense", "season", "off_ypa", "off_aya") | |
colnames(first_half_def) <- c("defense", "season", "def_ypa", "def_aya") | |
first_half_on_week <- first_half_off %>% | |
left_join(week_to_test, by = c("offense", "season")) %>% | |
na.omit() %>% | |
left_join(first_half_def, by = c("defense", "season")) | |
model_ypa_and_ypa_allowed <- lm(data = first_half_on_week, test_aya ~ off_aya + def_aya) | |
model_ypa_only <- lm(data = first_half_on_week, test_aya ~ off_aya) | |
model_ypa_allowed_only <- lm(data = first_half_on_week, test_aya ~ def_aya) | |
### glance() is a cool helper function that tidys up model summary info | |
both_res <- glance(model_ypa_and_ypa_allowed) %>% | |
mutate(model = "both", | |
week = week_to_split) | |
offense_res <- glance(model_ypa_only) %>% | |
mutate(model = "offense", | |
week = week_to_split) | |
defense_res <- glance(model_ypa_allowed_only) %>% | |
mutate(model = "defense", | |
week = week_to_split) | |
### bind it all up with twine | |
grouped <- both_res %>% | |
bind_rows(offense_res) %>% | |
bind_rows(defense_res) | |
### stuff it in our dataframe we created earlier | |
results <- rbind(results, grouped) | |
} | |
### plot the data for great visulaization | |
results %>% ggplot(aes(x = week, y = adj.r.squared, group = model, color = model)) + | |
geom_smooth(span = .3, se = FALSE)+ | |
geom_point() + | |
theme_minimal() + | |
labs(title = "Adjusted r-squared of Offensive AYA, Defensive AYA and Both as Predictors of Week N AYA") | |
### munge the data for great clarity | |
results %>% | |
spread(key = model, value = adj.r.squared) %>% | |
select(week, both, defense, offense, p.value) %>% | |
mutate(both = ifelse(is.na(both), 0, both), | |
offense = ifelse(is.na(offense), 0, offense), | |
defense = ifelse(is.na(defense), 0, defense)) %>% | |
group_by(week) %>% | |
summarize(both_adj_r_sq = round(max(both), 3), | |
offense_adj_r_sq = round(max(offense), 3), | |
defense_adj_r_sq = round(max(defense), 3)) %>% | |
kable() | |
### what percentage of the week are each predictor variable significant at 0.05? | |
results %>% | |
mutate(significant = ifelse(p.value <= 0.05, 1, 0)) %>% | |
group_by(model) %>% | |
summarize(`significant?` = round(mean(significant), 2)) %>% | |
kable() |
I think I mention it in the tweet where I posted this, but it was deliberate. I wanted to filter out interceptions and just look at positive passing performance. INTs are very noisy and rare, and I wanted to give defense the best shot for stability possible.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey Josh -- I'm curious why you calculated AYA (lines 62, 72, 82) without including interceptions. I thought the calculation for AYA was (Passing Yards + (20 * Passing TDs) - (45 * Interceptions))/(Passing Attempts).