Skip to content

Instantly share code, notes, and snippets.

@Ryo-N7
Last active December 1, 2019 09:56
Show Gist options
  • Save Ryo-N7/622fabee1cde074c1a1d8f2698bf56df to your computer and use it in GitHub Desktop.
Save Ryo-N7/622fabee1cde074c1a1d8f2698bf56df to your computer and use it in GitHub Desktop.
Visualize the EPL, Part 2
####
# Will keep updating this so if everything isn't there yet please be patient!
####
## Packages
pacman::p_load(tidyverse, polite, scales, ggimage, ggforce,
understatr, cowplot, kableExtra, ggbeeswarm,
jsonlite, xml2, qdapRegex, stringi, stringr,
rvest, glue, extrafont, ggrepel, magick, ggtext)
loadfonts(quiet = TRUE)
## Data
## Load in data from: soccer_ggplots/data/EPL_shots_data_df_raw.RDS
## Read in directly from the URL:
EPL_shots_data_df_raw <- readRDS(url("https://raw.githubusercontent.com/Ryo-N7/soccer_ggplots/master/data/EPL_shots_data_df_raw.RDS"))
## Colors
fillvals <- c("Liverpool" = "red",
"Tottenham" = "#132257",
"Manchester City" = "#6CABDD",
"Chelsea" = "blue", #"#034694",
"Arsenal" = "green", #"#9C824A",
"Manchester United" = "orange",
"Leicester" = "#b15928",
"Sheffield United" = "pink",
"Bournemouth" = "purple",
"Brighton" = "white",
"Other" = "grey")
## CLEANED data
EPL_situation_df <- EPL_shots_data_df_raw %>%
## pick out 3rd data frame which is game states
map(1) %>%
bind_rows(.id = "team_name")
EPL_situation_df_clean <- EPL_situation_df %>%
mutate(name = str_replace(name, "against.", "against_"),
team_name = str_replace(team_name, "_", " "),
team_name = as_factor(team_name)) %>%
separate(name, c("situation", "metric"),
sep = "\\.", extra = "merge") %>%
filter(metric != "stat") %>%
pivot_wider(names_from = "metric", values_from = "value") %>%
## create some more metrics
mutate(xGperShot = xG / shots,
xGAperShot = against_xG / against_shots) %>%
pivot_longer(-c("team_name", "situation"),
names_to = "metric", values_to = "value") %>%
pivot_wider(names_from = "situation", values_from = "value")
EPL_situation_percentile_df <- EPL_situation_df %>%
mutate(name = str_replace(name, "against.", "against_"),
team_name = str_replace(team_name, "_", " ")) %>%
separate(name, c("situation", "metric"),
sep = "\\.", extra = "merge") %>%
filter(metric != "stat") %>%
pivot_wider(names_from = "metric", values_from = "value") %>%
## create some more metrics
mutate(xGperShot = xG / shots,
xGAperShot = against_xG / against_shots) %>%
## normalize
group_by(situation) %>%
mutate_at(vars(c("against_shots", "against_goals",
"against_xG", "xGAperShot")),
~ rescale(., to = c(100, 0))) %>%
mutate_at(vars(c("shots", "goals", "xG", "xGperShot")),
~ rescale(., to = c(0, 100))) %>%
ungroup() %>%
## pivot back
pivot_longer(-c("team_name", "situation"),
names_to = "metric", values_to = "value") %>%
## percentile rank
group_by(metric, situation) %>%
mutate(percrank = rank(value) / length(value)) %>%
ungroup() %>%
select(-value) %>%
## pivot back
pivot_wider(names_from = "situation", values_from = "percrank") %>%
mutate(metric = case_when(
metric == "against_shots" ~ "Shots Against",
metric == "shots" ~ "Shots",
metric == "against_xG" ~ "xGA",
metric == "against_goals" ~ "Goals Against",
metric == "goals" ~ "Goals",
metric == "xG" ~ "xG",
metric == "xGperShot" ~ "xG per Shot",
metric == "xGAperShot" ~ "xGA per Shot",
TRUE ~ metric
)) %>%
## resort metric order
mutate(metric = as_factor(metric)) %>%
mutate(metric = fct_relevel(metric, "Shots Against", "Shots",
"Goals Against", "xGA",
"Goals", "xG"))
## Open Play: Attack
EPL_situation_percentile_df %>%
## team colors
mutate(team_name = case_when(
!team_name %in% c("Liverpool", "Tottenham", "Manchester City",
"Chelsea", "Arsenal", "Manchester United",
"Leicester", "Brighton") ~ "Other",
TRUE ~ team_name
)) %>%
mutate(team_name = as_factor(team_name)) %>%
filter(metric %in% c("Shots", "Goals", "xG", "xG per Shot")) %>%
ggplot(aes(x = metric)) +
geom_point(aes(y = OpenPlay, fill = team_name),
size = 6, stroke = 1.5,
color = "black", shape = 21,
position = position_jitterdodge(jitter.width = 0.6,
dodge.width = 0.8,
seed = 1892)) +
geom_vline(xintercept = 0.5) +
geom_vline(xintercept = 1.5) +
geom_vline(xintercept = 2.5) +
geom_vline(xintercept = 3.5) +
geom_vline(xintercept = 4.5) +
scale_fill_manual(values = fillvals, name = "Team") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1),
breaks = c(0, 0.25, 0.5, 0.75, 1),
limits = c(0, 1)) +
labs(title = "Offensive Stats from Open Play",
subtitle = glue("
Percentile Rank
As of Nov. 10, 2019 (Matchday 12)"),
caption = glue("
Data: understat.com
Twitter: @R_by_Ryo"),
x = NULL,
y = "Percentile Rank") +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed"),
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 18),
plot.caption = element_text(size = 12),
axis.title = element_text(size = 16),
axis.text = element_text(size = 14, color = "black"),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14))
## Open Play: Defense
EPL_situation_percentile_df %>%
## team colors
mutate(team_name = case_when(
!team_name %in% c("Liverpool", "Tottenham", "Manchester City",
"Chelsea", "Arsenal", "Manchester United",
"Leicester", "Brighton") ~ "Other",
TRUE ~ team_name
)) %>%
mutate(team_name = as_factor(team_name)) %>%
filter(metric %in% c("Shots Against", "Goals Against",
"xGA", "xGA per Shot")) %>%
ggplot(aes(x = metric)) +
geom_point(aes(y = OpenPlay, fill = team_name),
size = 6, stroke = 1.5,
color = "black", shape = 21,
position = position_jitterdodge(jitter.width = 0.6,
dodge.width = 0.8,
seed = 1892)) +
geom_vline(xintercept = 0.5) +
geom_vline(xintercept = 1.5) +
geom_vline(xintercept = 2.5) +
geom_vline(xintercept = 3.5) +
geom_vline(xintercept = 4.5) +
scale_fill_manual(values = fillvals, name = "Team") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1),
breaks = c(0, 0.25, 0.5, 0.75, 1),
limits = c(0, 1)) +
labs(title = "Defensive Stats from Open Play",
subtitle = glue("
Percentile Rank
As of Nov. 10, 2019 (Matchday 12)"),
caption = glue("
Data: understat.com
Twitter: @R_by_Ryo"),
x = NULL,
y = "Percentile Rank") +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed"),
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 18),
plot.caption = element_text(size = 12),
axis.title = element_text(size = 16),
axis.text = element_text(size = 14, color = "black"),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14))
## Setpiece ALL (Indirect free kick + corners)
EPL_situation_SetPieceAll_df <- EPL_situation_df_clean %>%
group_by(team_name, metric) %>%
mutate(SetPieceAll = FromCorner + SetPiece) %>%
ungroup() %>%
select(team_name, metric, SetPieceAll) %>%
pivot_wider(names_from = "metric", values_from = "SetPieceAll") %>%
## recalculate per shot values
group_by(team_name) %>%
mutate(xGperShot = xG / shots,
xGAperShot = against_xG / against_shots) %>%
ungroup() %>%
## back to other
mutate_at(vars(c("against_shots", "against_goals",
"against_xG", "xGAperShot")),
~ rescale(., to = c(100, 0))) %>%
mutate_at(vars(c("shots", "goals", "xG", "xGperShot")),
~ rescale(., to = c(0, 100))) %>%
## pivot back
pivot_longer(-c("team_name"),
names_to = "metric", values_to = "value") %>%
## percentile rank
group_by(metric) %>%
mutate(percrank = rank(value) / length(value)) %>%
ungroup() %>%
select(-value) %>%
## resort metric order
mutate(metric = case_when(
metric == "against_shots" ~ "Shots Against",
metric == "shots" ~ "Shots",
metric == "against_xG" ~ "xGA",
metric == "against_goals" ~ "Goals Against",
metric == "goals" ~ "Goals",
metric == "xG" ~ "xG",
metric == "xGperShot" ~ "xG per Shot",
metric == "xGAperShot" ~ "xGA per Shot",
TRUE ~ metric
)) %>%
## resort metric order
mutate(metric = as_factor(metric)) %>%
mutate(metric = fct_relevel(metric, "Shots Against", "Shots",
"Goals Against", "xGA",
"Goals", "xG"))
## Set piece: Attack
EPL_situation_SetPieceAll_df %>%
## team colors
mutate(team_name = as.character(team_name)) %>%
mutate(team_name = case_when(
!team_name %in% c("Liverpool", "Tottenham", "Manchester City",
"Chelsea", "Arsenal", "Manchester United",
"Leicester", "Bournemouth") ~ "Other",
TRUE ~ team_name
)) %>%
mutate(team_name = as_factor(team_name)) %>%
filter(metric %in% c("Shots", "Goals", "xG", "xG per Shot")) %>%
ggplot(aes(x = metric)) +
geom_point(aes(y = percrank, fill = team_name),
size = 6, stroke = 1.5,
color = "black", shape = 21,
position = position_jitterdodge(jitter.width = 0.6,
dodge.width = 0.8,
seed = 1892)) +
geom_vline(xintercept = 0.5) +
geom_vline(xintercept = 1.5) +
geom_vline(xintercept = 2.5) +
geom_vline(xintercept = 3.5) +
geom_vline(xintercept = 4.5) +
scale_fill_manual(values = fillvals, name = "Team") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1),
breaks = c(0, 0.25, 0.5, 0.75, 1),
limits = c(0, 1)) +
labs(title = "Offensive Stats from Corners & Set Pieces",
subtitle = glue("
Percentile Rank | Excluding Direct Free Kicks
As of Nov. 10, 2019 (Matchday 12)"),
caption = glue("
Data: understat.com
Twitter: @R_by_Ryo"),
x = NULL,
y = "Percentile Rank") +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed"),
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 18),
plot.caption = element_text(size = 12),
axis.title = element_text(size = 16),
axis.text = element_text(size = 14, color = "black"),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14))
## Set piece: Defense
EPL_situation_SetPieceAll_df %>%
mutate(team_name = as.character(team_name)) %>%
## team colors
mutate(team_name = case_when(
!team_name %in% c("Liverpool", "Tottenham", "Manchester City",
"Chelsea", "Arsenal", "Manchester United",
"Leicester", "Bournemouth") ~ "Other",
TRUE ~ team_name
)) %>%
mutate(team_name = as_factor(team_name)) %>%
filter(metric %in% c("Shots Against", "Goals Against",
"xGA", "xGA per Shot")) %>%
ggplot(aes(x = metric)) +
geom_point(aes(y = percrank, fill = team_name),
size = 6, stroke = 1.5,
color = "black", shape = 21,
position = position_jitterdodge(jitter.width = 0.6,
dodge.width = 0.8,
seed = 1892)) +
geom_vline(xintercept = 0.5) +
geom_vline(xintercept = 1.5) +
geom_vline(xintercept = 2.5) +
geom_vline(xintercept = 3.5) +
geom_vline(xintercept = 4.5) +
scale_fill_manual(values = fillvals, name = "Team") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1),
breaks = c(0, 0.25, 0.5, 0.75, 1),
limits = c(0, 1)) +
labs(title = "Defensive Stats from Corners & Set Pieces",
subtitle = glue("
Percentile Rank | Excluding Direct Free Kicks
As of Nov. 10, 2019 (Matchday 12)"),
caption = glue("
Data: understat.com
Twitter: @R_by_Ryo"),
x = NULL,
y = "Percentile Rank") +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed"),
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 18),
plot.caption = element_text(size = 12),
axis.title = element_text(size = 16),
axis.text = element_text(size = 14, color = "black"),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14))
## Corners
## Corners: Attack
EPL_situation_percentile_df %>%
## team colors
mutate(team_name = case_when(
!team_name %in% c("Liverpool", "Tottenham", "Manchester City",
"Chelsea", "Arsenal", "Manchester United",
"Leicester") ~ "Other",
TRUE ~ team_name
)) %>%
mutate(team_name = as_factor(team_name)) %>%
filter(metric %in% c("Shots", "Goals", "xG", "xG per Shot")) %>%
ggplot(aes(x = metric)) +
geom_point(aes(y = FromCorner, fill = team_name),
size = 6, stroke = 1.5,
color = "black", shape = 21,
position = position_jitterdodge(jitter.width = 0.6,
dodge.width = 0.8,
seed = 1892)) +
geom_vline(xintercept = 0.5) +
geom_vline(xintercept = 1.5) +
geom_vline(xintercept = 2.5) +
geom_vline(xintercept = 3.5) +
geom_vline(xintercept = 4.5) +
scale_fill_manual(values = fillvals, name = "Team") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1),
breaks = c(0, 0.25, 0.5, 0.75, 1),
limits = c(0, 1)) +
labs(title = "Offensive Stats from Corner Kicks",
subtitle = glue("
Percentile Rank
As of Nov. 10, 2019 (Matchday 12)"),
caption = glue("
Data: understat.com
Twitter: @R_by_Ryo"),
x = NULL,
y = "Percentile Rank") +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed"),
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 18),
plot.caption = element_text(size = 12),
axis.title = element_text(size = 16),
axis.text = element_text(size = 14, color = "black"),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14))
## Corners: Defense
EPL_situation_percentile_df %>%
## team colors
# mutate(team_name = case_when(
# !team_name %in% c("Liverpool", "Tottenham", "Manchester City",
# "Chelsea", "Arsenal", "Manchester United",
# "Leicester", "Brighton") ~ "Other",
# TRUE ~ team_name
# )) %>%
mutate(team_name = as_factor(team_name)) %>%
filter(metric %in% c("shots", "goals", "xG", "xGperShot")) %>%
ggplot(aes(x = metric)) +
geom_point(aes(y = FromCorner, fill = team_name),
size = 6, stroke = 1.5,
color = "black", shape = 21,
position = position_jitterdodge(jitter.width = 0.6,
dodge.width = 0.8,
seed = 1892)) +
geom_vline(xintercept = 0.5) +
geom_vline(xintercept = 1.5) +
geom_vline(xintercept = 2.5) +
geom_vline(xintercept = 3.5) +
geom_vline(xintercept = 4.5) +
# scale_fill_manual(values = fillvals, name = "Team") +
scale_fill_manual(values = pals::glasbey(20), name = "Team") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1),
breaks = c(0, 0.25, 0.5, 0.75, 1),
limits = c(0, 1)) +
labs(title = "Offensive Stats from Corner Kicks",
subtitle = glue("
Percentile Rank
As of Nov. 10, 2019 (Matchday 12)"),
caption = glue("
Data: understat.com
Twitter: @R_by_Ryo"),
x = NULL,
y = "Percentile Rank") +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed"),
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 18),
plot.caption = element_text(size = 12),
axis.title = element_text(size = 16),
axis.text = element_text(size = 14, color = "black"),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14))
## % of xG from situations
EPL_situation_OpenPlay_And_All_df <- EPL_situation_df_clean %>%
mutate(Penalty = if_else(is.na(Penalty), 0, Penalty)) %>%
group_by(team_name, metric) %>%
mutate(all = sum(OpenPlay + DirectFreekick + FromCorner, SetPiece),
openplay_perc = OpenPlay / all,
corner_perc = FromCorner / all,
half_corner = corner_perc / 2,
half = openplay_perc / 2) %>%
ungroup() %>%
select(team_name, metric, openplay_perc, half,
corner_perc, half_corner)
## % of xG from corner kicks
EPL_situation_OpenPlay_And_All_df %>%
filter(metric == "xG") %>%
ggplot(aes(x = fct_reorder(team_name, corner_perc),
y = corner_perc)) +
geom_col(color = "black", fill = "#00ff85") +
geom_text(aes(y = half_corner,
label = percent_format(accuracy = 0.1) (corner_perc)),
color = "#38003c", size = 5.5, fontface = "bold",
family = "Roboto Condensed") +
scale_y_continuous(expand = c(0.01, 0),
labels = scales::percent_format(accuracy = 2)) +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed",
color = "white"),
title = element_text(size = 18),
axis.text = element_text(color = "white", size = 14),
plot.background = element_rect(fill = "#38003c")) +
labs(title = "Percentage of Total non-Penalty xG from Corner Kicks",
subtitle = "As of Nov. 10, 2019 (Matchday 12)",
x = NULL, y = "Percentage of Total non-Penalty xG from Corner Kicks",
caption = glue("Twitter: @R_by_Ryo Data: understat.com")) +
coord_flip()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment