Skip to content

Instantly share code, notes, and snippets.

@Ryo-N7
Last active May 6, 2020 07:01
Show Gist options
  • Save Ryo-N7/a4b8799328debf335a055d52260f8d03 to your computer and use it in GitHub Desktop.
Save Ryo-N7/a4b8799328debf335a055d52260f8d03 to your computer and use it in GitHub Desktop.
np-xG per 90 vs. xA per 90
# pkgs
```{r, message=FALSE, warning=FALSE}
pacman::p_load(tidyverse, polite, scales, ggimage,
ggforce, ggtext,
rvest, glue, extrafont, ggrepel, magick)
loadfonts()
```
## add_logo
```{r}
add_logo <- function(plot_path, logo_path, logo_position, logo_scale = 10){
# Requires magick R Package https://github.com/ropensci/magick
# Useful error message for logo position
if (!logo_position %in% c("top right", "top left", "bottom right", "bottom left")) {
stop("Error Message: Uh oh! Logo Position not recognized\n Try: logo_positon = 'top left', 'top right', 'bottom left', or 'bottom right'")
}
# read in raw images
plot <- magick::image_read(plot_path)
logo_raw <- magick::image_read(logo_path)
# get dimensions of plot for scaling
plot_height <- magick::image_info(plot)$height
plot_width <- magick::image_info(plot)$width
# default scale to 1/10th width of plot
# Can change with logo_scale
logo <- magick::image_scale(logo_raw, as.character(plot_width/logo_scale))
# Get width of logo
logo_width <- magick::image_info(logo)$width
logo_height <- magick::image_info(logo)$height
# Set position of logo
# Position starts at 0,0 at top left
# Using 0.01 for 1% - aesthetic padding
if (logo_position == "top right") {
x_pos = plot_width - logo_width - 0.01 * plot_width
y_pos = 0.01 * plot_height
} else if (logo_position == "top left") {
x_pos = 0.01 * plot_width
y_pos = 0.01 * plot_height
} else if (logo_position == "bottom right") {
x_pos = plot_width - logo_width - 0.01 * plot_width
y_pos = plot_height - logo_height - 0.01 * plot_height
} else if (logo_position == "bottom left") {
x_pos = 0.01 * plot_width
y_pos = plot_height - logo_height - 0.01 * plot_height
}
# Compose the actual overlay
magick::image_composite(plot, logo, offset = paste0("+", x_pos, "+", y_pos))
}
```
# Premier League data
## fbref
- https://fbref.com/en/comps/9/stats/Premier-League-Stats
- Save "Player Standard Stats" & "Squad Standard Stats" as a `.csv` file or whatever you prefer.
### squad stats
```{r}
epl_squad_stats_MD21_2_raw <- read_csv(here::here("data/epl_squad_stats_MD21_2.csv"),
skip = 1, col_names = TRUE)
epl_squad_stats_MD21_2 <- epl_squad_stats_MD21_2_raw %>%
rename_at(vars(12:16),
~ glue::glue("{colnames(epl_squad_stats_MD21_2_raw)[12:16]}_p90")) %>%
rename_at(vars(20:24),
~ glue::glue("{colnames(epl_squad_stats_MD21_2_raw)[20:24]}_p90"))
epl_squad_stats_MD21_2 <- epl_squad_stats_MD21_2 %>%
rename_at(vars(2:24), ~ glue::glue("{colnames(epl_squad_stats_MD21_2)[2:24]}_squad")) %>%
rename_at(vars(contains("_1")), ~ str_replace(., "_1", ""))
glimpse(epl_squad_stats_MD21_2)
```
### player stats
```{r}
epl_player_stats_MD20_raw <- read_csv(here::here("data/epl_player_stats_MD20.csv"),
skip = 1, col_names = TRUE)
epl_player_stats_MD20 <- epl_player_stats_MD20_raw %>%
rename_at(vars(17:21),
~ glue::glue("{colnames(epl_player_stats_MD20_raw)[17:21]}_p90")) %>%
rename_at(vars(25:29),
~ glue::glue("{colnames(epl_player_stats_MD20_raw)[25:29]}_p90"))
epl_player_stats_MD20 <- epl_player_stats_MD20 %>%
rename_at(vars(contains("_1")), ~ str_replace(., "_1", "")) %>%
select(-Matches, -Rk)
glimpse(epl_player_stats_MD20)
```
## clean
```{r}
goal_contribution_clean_df <- epl_player_stats_MD20 %>%
left_join(epl_squad_stats_MD20, by = "Squad") %>%
separate(Player, into = c("fullname", "allname"), sep = "\\\\") %>%
separate(fullname, into = c("firstname", "lastname"),
sep = "\\s", extra = "merge",
remove = FALSE) %>%
mutate(fname = str_extract(firstname, "[A-Z]{1}")) %>%
## players like Fabinho listed without Tavares last name
mutate(player = if_else(is.na(lastname), firstname, lastname)) %>%
mutate(player = case_when(
!is.na(lastname) ~ glue("{fname}. {lastname}"),
TRUE ~ firstname)) %>%
group_by(fullname) %>%
mutate(goal_contrib = Gls / Gls_squad,
assist_contrib = Ast / Gls_squad) %>%
ungroup() %>%
## keep fullname so can differentiate A. Gomes (Andre/Angel), etc.
select(player, fullname, Pos, Squad, Min,
Gls, Gls_squad, PK, PK_squad,
Ast, Ast_squad,
goal_contrib, assist_contrib,
npxG_p90, xA_p90, npxG, xA)
glimpse(goal_contribution_clean_df)
```
```{r}
xG_xA_per90 <- goal_contribution_clean_df
```
```{r}
xG_xA_per90 %>%
filter(Min >= 900,
Pos %in% c("FW", "FWMF")) %>%
summarize(avg_npxg90 = median(npxG_p90),
avg_xA = median(xA_p90))
```
## plot
```{r}
desc_jesus <- "Jesus leads the league with 0.87 np-xG per 90, however, until the Everton game he had only converted 6 goals from 8.5 xG. Now he has improved to 8 goals from 9.6 np-xG!"
desc_aguero_sterling <- "With only 1 goal in the last 6 league games City is in desperate need for Aguero to get firing again as his 8 np-Goals from 6.6 np-xG is more clinical than Gabriel Jesus. Raheem Sterling is City's top scorer with 11 goals from 8.7 np-xG and is in the top 10 for xG and xA per 90 showing his dual threat."
desc_mahrez_debruyne <- "Mahrez & De Bruyne create the chances that Sterling, Aguero, and to a lesser extent - Gabriel Jesus convert. The two lead the league in Passes into the Penalty Area (PPA) while De Bruyne racks up 3.86 Key Passes (KP) per 90, the best in the league by far."
```
```{r fig.width = 12, fig.height = 10}
xG_xA_per90_plot <- xG_xA_per90 %>%
filter(Min >= 900) %>%
ggplot(aes(xA_p90, npxG_p90)) +
geom_point(data = xG_xA_per90 %>%
filter(xA_p90 < 0.13 | npxG_p90 < 0.29,
Min >= 900),
color = "grey20", size = 4, alpha = 0.2) +
geom_point(data = xG_xA_per90 %>%
filter(xA_p90 > 0.13 | npxG_p90 > 0.29,
Min >= 900),
color = "red", size = 4) +
geom_hline(yintercept = 0.29, color = "red", alpha = 0.6) +
geom_vline(xintercept = 0.13, color = "red", alpha = 0.6) +
geom_text_repel(
data = xG_xA_per90 %>%
filter(xA_p90 > 0.13 | npxG_p90 > 0.29,
!player %in% c("G. Jesus", "R. Sterling", "S. Agüero",
"R. Mahrez", "K. De Bruyne", "S. Mané"),
Min >= 900),
aes(label = player, family = "Roboto Condensed", fontface = "bold"),
seed = 15, size = 3.5,
min.segment.length = 0, segment.color = "red",
point.padding = 0.5) +
geom_text(
data = xG_xA_per90 %>%
filter(player == "S. Mané"),
aes(label = player, family = "Roboto Condensed", fontface = "bold"),
size = 3.5, nudge_y = -0.015) +
geom_text(
data = xG_xA_per90 %>%
filter(player == "R. Sterling"),
aes(label = player, family = "Roboto Condensed", fontface = "bold"),
size = 4.5, nudge_x = 0.03, nudge_y = 0.025,
color = "#6CABDD") +
geom_text_repel(
data = xG_xA_per90 %>%
filter(player %in% c("S. Agüero",
"R. Mahrez", "K. De Bruyne")),
aes(label = player, family = "Roboto Condensed", fontface = "bold"),
seed = 15, size = 4.5, color = "#6CABDD",
min.segment.length = 0, segment.color = "red",
point.padding = unit(3, "mm")) +
geom_mark_circle(
aes(filter = player == "G. Jesus",
label = "Gabi Jesus: The Finisher",
description = desc_jesus),
label.width = unit(60, 'mm'), label.buffer = unit(5, "mm"),
label.family = "Roboto Condensed", label.fontsize = c(14, 12),
label.colour = c("#6CABDD", "black")) +
geom_mark_hull(
aes(filter = player %in% c("R. Sterling", "S. Agüero"),
label = "City's Hybrid Threats",
description = desc_aguero_sterling),
expand = unit(0.05, "mm"), label.width = unit(80, 'mm'),
label.buffer = unit(20, "mm"), label.fontsize = c(14, 11),
label.family = "Roboto Condensed",
label.colour = c("#6CABDD", "black")) +
geom_mark_hull(
aes(filter = player %in% c("R. Mahrez", "K. De Bruyne"),
label = "City's Creators-in-Chief",
description = desc_mahrez_debruyne),
expand = unit(2.22, "mm"), label.width = unit(100, 'mm'),
label.buffer = unit(10, "mm"), label.fontsize = c(14, 11),
label.family = "Roboto Condensed",
label.colour = c("#6CABDD", "black")) +
annotate("text", family = "Roboto Condensed", fontface = "bold",
x = 0.132, y = 0.9, hjust = 0, color = "red",
label = "Average xA per 90: 0.14") +
annotate("text", family = "Roboto Condensed", fontface = "bold",
x = 0.57, y = 0.3, color = "red",
label = "Average np-xG per 90: 0.29") +
scale_x_continuous(labels = seq(0, 0.9, 0.1),
breaks = seq(0, 0.9, 0.1),
limits = c(0, 0.65)) +
scale_y_continuous(labels = seq(0, 0.9, 0.1),
breaks = seq(0, 0.9, 0.1),
limits = c(0, 0.9)) +
labs(title = "<b style='color: #6CABDD'>City's Attackers </b> Dominate the 'Expected' Stats: xG per 90 & xA per 90",
subtitle = glue("
Premier League (2019-2020 Season) | Matchday 21 | January 3rd, 2020
<p><b style='color: red'>Average (Median)</b> for Midfielders/Forwards | Minimum 900 Minutes Played"),
caption = glue("
Data: FBref | StatsBomb
Ryo Nakagawara, Twitter: @R_by_Ryo"),
x = "Expected Assists (xA) per 90",
y = "non-Penalty Expected Goals (np-xG) per 90") +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed"),
plot.title = element_markdown(size = 18),
plot.subtitle = element_markdown(size = 16),
plot.caption = element_text(size = 14),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
panel.grid.minor.x = element_blank())
xG_xA_per90_plot
```
#### save
```{r}
ggsave(plot = xG_xA_per90_plot,
here::here("Premier League 2019-2020/output/xA_xG_per90_1920_MD21.png"),
height = 10, width = 12)
```
```{r}
plot_logo <- add_logo(
plot_path = here::here("Premier League 2019-2020/output/xA_xG_per90_1920_MD21.png"),
logo_path = "https://upload.wikimedia.org/wikipedia/en/f/f2/Premier_League_Logo.svg",
logo_position = "top right",
logo_scale = 6)
plot_logo
```
```{r}
image_write(image = plot_logo,
here::here("Premier League 2019-2020/output/xA_xG_per90_1920_MD21_logo.png"))
```
@Ryo-N7
Copy link
Author

Ryo-N7 commented Jan 30, 2020

xA_xG_per90_1920_MD21_logo

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment