Skip to content

Instantly share code, notes, and snippets.

@meghall06
Last active June 30, 2022 06:25
Show Gist options
  • Save meghall06/e125bc9e99446b4a09c7598cd47cb4a3 to your computer and use it in GitHub Desktop.
Save meghall06/e125bc9e99446b4a09c7598cd47cb4a3 to your computer and use it in GitHub Desktop.
# to create the plot in this tweet: https://twitter.com/MeghanMHall/status/1518397321150664705
library(tidyverse)
library(hockeyR)
library(glue)
library(ggtext)
library(showtext)
font_add_google(name = "Jost", family = "jost")
showtext_auto()
# initial data prep----
pbp_raw <- load_pbp("2021-22")
pbp <- pbp_raw %>%
mutate(event_length = ifelse(lead(game_id) == game_id,
lead(game_seconds) - game_seconds, 0)) %>%
rename(home_team = home_abbreviation,
away_team = away_abbreviation)
# I like the NHL rosters better than the hockey-ref ones
game_ids <- pbp %>%
select(game_id) %>%
mutate(game_id = as.character(game_id)) %>%
unique()
roster_fn <- function(id) {
hockeyR::get_game_rosters(jsonlite::read_json(glue::glue("http://statsapi.web.nhl.com/api/v1/game/{id}/feed/live")))
}
rosters <- purrr::map_df(game_ids$game_id, roster_fn) %>%
unique()
positions <- rosters %>%
mutate(player = str_replace_all(player_name, "'", " "),
player = str_replace_all(player, "\\.", " "),
player = str_replace_all(player, "-", " "),
player = ifelse(player_id == 8480222, "Sebastian AhoD",
player)) %>%
select(player, position_type) %>%
unique()
# fix the Marc Andre Fleury problem----
# for some reason MAF counts as a skater, not a goalie, for the on-ice variables
# and those drive skater counts and those drive strength state...
# so CHI is never depicted with the correct strength state ¯\_(ツ)_/¯
# also happens to Ukko Pekka Luukkonen and Cam Talbot
# noticed this pretty early on, that a few goalies were "on ice"
# wouldn't be a big deal, except for it affects strength state...
# can see that CHI only spends 38% of the time @ 5v5, which is def wrong
goalie_issue <- pbp %>%
select(home_team, away_team, home_skaters, away_skaters, event_length) %>%
filter(event_length > 0) %>%
pivot_longer(home_team:away_skaters,
names_to = c(NA, ".value"),
names_sep = 5) %>%
group_by(team, skaters) %>%
summarize(time = sum(event_length)) %>%
add_tally(wt = time) %>%
mutate(perc = round(time / n * 100, 2)) %>%
filter(skaters == 5) %>%
arrange(perc)
pbp <- pbp %>%
mutate(across(contains("_on_"), ~ ifelse(.x %in% c("Marc.Andre.Fleury",
"Ukko.Pekka.Luukkonen",
"Cam.Talbot",
"J.F.Berube"), NA, .x))) %>%
# why does this calc only work if it's in a separate mutate function...
mutate(home_skaters = rowSums(!is.na(select(., contains("home_on")))),
away_skaters = rowSums(!is.na(select(., contains("away_on")))),
home_v_away = str_c(home_skaters, "v", away_skaters))
# players by game----
# to have a record of which players played in which game
player_by_game <- pbp %>%
select(game_id, home_team, home_on_1:home_on_7,
away_team, away_on_1:away_on_7) %>%
rename_with(~str_replace(., "_on", ""), .cols = contains("on")) %>%
pivot_longer(home_team:away_7,
names_to = c("home_away", ".value"),
names_pattern = "(.+)_(.+)",
values_drop_na = TRUE) %>%
pivot_longer(`1`:`7`,
names_to = NULL,
values_to = "player",
values_drop_na = TRUE) %>%
unique() %>%
mutate(player = str_replace_all(player, "\\.", " ")) %>%
group_by(player, team) %>%
mutate(games = n_distinct(game_id))
# 5v5----
# trailing OZ faceoffs only in the third
five_v_five <- pbp %>%
filter(home_v_away == "5v5" & home_score != away_score &
event_type == "FACEOFF" & period == 3) %>%
mutate(trailing_OZ = case_when(home_score < away_score & x_fixed == 69 ~ 1,
away_score < home_score & x_fixed == -69 ~ 1,
TRUE ~ 0)) %>%
filter(trailing_OZ == 1) %>%
select(home_score, home_team, away_score, away_team, home_on_1:home_on_5,
away_on_1:away_on_5, trailing_OZ, game_id) %>%
mutate(trailing_team = ifelse(home_score < away_score, home_team, away_team),
trailing_amt = abs(home_score - away_score),
trailing1 = ifelse(home_score < away_score, home_on_1, away_on_1),
trailing2 = ifelse(home_score < away_score, home_on_2, away_on_2),
trailing3 = ifelse(home_score < away_score, home_on_3, away_on_3),
trailing4 = ifelse(home_score < away_score, home_on_4, away_on_4),
trailing5 = ifelse(home_score < away_score, home_on_5, away_on_5)) %>%
select(trailing_team:trailing5, game_id) %>%
group_by(trailing_team, game_id) %>%
mutate(no = row_number())
# count of those faceoffs per team per game
game_sums_ozf <- five_v_five %>%
group_by(trailing_team, game_id) %>%
summarize(ozf_team = max(no))
# count of those faceoffs per player per game
player_sums_ozf <- five_v_five %>%
pivot_longer(trailing1:trailing5, names_to = NULL, values_to = "player") %>%
count(trailing_team, player, game_id, name = "ozf") %>%
mutate(player = str_replace_all(player, "\\.", " "),
player = ifelse(player == "Sebastian Aho" & trailing_team == "NYI",
"Sebastian AhoD", player))
# start with the original players list, add in the player sums as well as the
# game sums
ozf <- player_by_game %>%
left_join(player_sums_ozf, by = c("player" = "player",
"team" = "trailing_team",
"game_id" = "game_id")) %>%
left_join(select(positions, player, position_type), by = "player") %>%
left_join(game_sums_ozf, by = c("team" = "trailing_team",
"game_id" = "game_id")) %>%
mutate(ozf = replace_na(ozf, 0),
ozf_team = replace_na(ozf_team, 0)) %>%
group_by(player, position_type, team, games) %>%
summarize(ozf = sum(ozf),
ozf_team = sum(ozf_team)) %>%
mutate(ozf_perc = ozf / ozf_team)
# PK----
pk <- pbp %>%
filter(home_v_away %in% c("4v5","5v4")) %>%
mutate(pp_team = ifelse(away_skaters == 4, home_team, away_team),
pk_team = ifelse(away_skaters == 4, away_team, home_team),
pk_1 = ifelse(away_skaters == 4, away_on_1, home_on_1),
pk_2 = ifelse(away_skaters == 4, away_on_2, home_on_2),
pk_3 = ifelse(away_skaters == 4, away_on_3, home_on_3),
pk_4 = ifelse(away_skaters == 4, away_on_4, home_on_4),
shot_event = case_when(!(event_type %in% c("GOAL","SHOT","BLOCKED_SHOT",
"MISSED_SHOT")) ~ NA_character_,
event_team_abbr == pp_team ~ "PP",
event_team_abbr == pk_team ~ "PK")) %>%
filter(event_length > 0 | event_type %in% c("GOAL","SHOT","BLOCKED_SHOT",
"MISSED_SHOT","FACEOFF")) %>%
select(game_id, event_type, event_idx, event_team = event_team_abbr,
shot_event, pp_team:pk_4, event_length)
# sum of PK time per team per game
game_sums_pk <- pk %>%
group_by(pk_team, game_id) %>%
summarize(time_team = sum(event_length))
# sum of PK time per player per game
player_sums_pk <- pk %>%
pivot_longer(pk_1:pk_4, names_to = NULL, values_to = "player") %>%
group_by(pk_team, player, game_id) %>%
summarize(time = sum(event_length)) %>%
mutate(player = str_replace_all(player, "\\.", " "),
player = ifelse(player == "Sebastian Aho" & pk_team == "NYI",
"Sebastian AhoD", player))
# start with the original players list, add in the player sums as well as the
# game sums
pk_time <- player_by_game %>%
left_join(player_sums_pk, by = c("player" = "player",
"team" = "pk_team",
"game_id" = "game_id")) %>%
left_join(select(positions, player, position_type), by = "player") %>%
left_join(game_sums_pk, by = c("team" = "pk_team",
"game_id" = "game_id")) %>%
mutate(time = replace_na(time, 0),
time_team = replace_na(time_team, 0)) %>%
group_by(player, position_type, team, games) %>%
summarize(time = sum(time),
time_team = sum(time_team)) %>%
mutate(time_perc = time / time_team)
# initial plots----
plot <- ozf %>%
left_join(pk_time, by = c("player","position_type","team","games")) %>%
filter(games > 20 & !is.na(position_type))
names <- hockeyR::team_logos_colors
focus_team <- plot %>%
filter(team %in% c("TOR","CAR","CBJ","STL")) %>%
mutate(label = case_when(ozf_perc > 0.4 & time_perc > 0.39 ~ player,
TRUE ~ NA_character_)) %>%
left_join(select(names, team = team_abbr, full_team_name), by = "team")
gray <- plot %>%
ungroup() %>%
select(-team)
focus_team %>%
ggplot(aes(x = ozf_perc, y = time_perc)) +
geom_point(data = gray, color = "grey", alpha = 0.5) +
geom_point(size = 2.5, color = "#172A3A") +
facet_wrap(~full_team_name) +
geom_text(size = 4, mapping = aes(label = label), family = "jost",
hjust = -0.1) +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(labels = scales::percent) +
labs(title = "Personnel on the penalty kill, 2021-22 NHL season through April 7",
y = "% of time on the PK",
x = "% of 5v5 high-leverage offensive faceoffs",
subtitle = "@MeghanMHall, data from hockeyR") +
guides(color = "none") +
theme_bw() +
theme(text = element_text(family = "jost"),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", size = 0.3),
strip.background = element_blank(),
strip.text = element_textbox(size = 11, color = "white", fill = "#172A3A",
halign = 0.5, linetype = 1, r = unit(5, "pt"), width = unit(1, "npc"),
padding = margin(3, 0, 3, 0), margin = margin(0, 2, 0, 2)),
panel.border = element_rect(color = "black", fill = NA, size = 0.5),
axis.ticks = element_blank(),
plot.caption.position = "plot",
plot.caption = element_text(hjust = 0, vjust = -1),
plot.subtitle = element_text(face = 3, size = 9))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment