Created
January 14, 2020 10:04
-
-
Save Ryo-N7/fe35f47525e12c59e52deb5c24e88503 to your computer and use it in GitHub Desktop.
np-xG per 90 vs. xA per 90 plot (Bundesliga 2019-2020, Hinrunde)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pkgs | |
```{r, message=FALSE, warning=FALSE} | |
pacman::p_load(tidyverse, scales, ggforce, ggtext, | |
rvest, glue, extrafont, ggrepel, magick) | |
loadfonts() | |
``` | |
## add_logo | |
```{r} | |
add_logo <- function(plot_path, logo_path, logo_position, | |
logo_scale = 10){ | |
# Requires magick R Package https://github.com/ropensci/magick | |
# Useful error message for logo position | |
if (!logo_position %in% c("top right", "top left", "bottom right", "bottom left")) { | |
stop("Error Message: Uh oh! Logo Position not recognized\n Try: logo_positon = 'top left', 'top right', 'bottom left', or 'bottom right'") | |
} | |
# read in raw images | |
plot <- magick::image_read(plot_path) | |
logo_raw <- magick::image_read(logo_path) | |
# get dimensions of plot for scaling | |
plot_height <- magick::image_info(plot)$height | |
plot_width <- magick::image_info(plot)$width | |
# default scale to 1/10th width of plot | |
# Can change with logo_scale | |
logo <- magick::image_scale(logo_raw, as.character(plot_width/logo_scale)) | |
# Get width of logo | |
logo_width <- magick::image_info(logo)$width | |
logo_height <- magick::image_info(logo)$height | |
# Set position of logo | |
# Position starts at 0,0 at top left | |
# Using 0.01 for 1% - aesthetic padding | |
if (logo_position == "top right") { | |
x_pos = plot_width - logo_width - 0.01 * plot_width | |
y_pos = 0.01 * plot_height | |
} else if (logo_position == "top left") { | |
x_pos = 0.01 * plot_width | |
y_pos = 0.01 * plot_height | |
} else if (logo_position == "bottom right") { | |
x_pos = plot_width - logo_width - 0.01 * plot_width | |
y_pos = plot_height - logo_height - 0.01 * plot_height | |
} else if (logo_position == "bottom left") { | |
x_pos = 0.01 * plot_width | |
y_pos = plot_height - logo_height - 0.01 * plot_height | |
} | |
# Compose the actual overlay | |
magick::image_composite(plot, logo, offset = paste0("+", x_pos, "+", y_pos)) | |
} | |
``` | |
# Bundesliga data (FBref) | |
- Save "Player Standard Stats" & "Squad Standard Stats" as a `.csv` file or whatever you prefer. | |
- https://fbref.com/en/comps/20/stats/Bundesliga-Stats | |
## squad stats | |
```{r} | |
buli_squad_stats_hinrunde_raw <- read_csv(here::here("data/buli_squad_stats_hinrunde.csv"), | |
skip = 1, col_names = TRUE) | |
buli_squad_stats_hinrunde <- buli_squad_stats_hinrunde_raw %>% | |
rename_at(vars(12:16), | |
~ glue::glue("{colnames(buli_squad_stats_hinrunde_raw)[12:16]}_p90")) %>% | |
rename_at(vars(20:24), | |
~ glue::glue("{colnames(buli_squad_stats_hinrunde_raw)[20:24]}_p90")) | |
buli_squad_stats_hinrunde <- buli_squad_stats_hinrunde %>% | |
rename_at(vars(2:24), ~ glue::glue("{colnames(buli_squad_stats_hinrunde)[2:24]}_squad")) %>% | |
rename_at(vars(contains("_1")), ~ str_replace(., "_1", "")) | |
glimpse(buli_squad_stats_hinrunde) | |
``` | |
```{r} | |
## save | |
saveRDS(buli_squad_stats_hinrunde, file = glue("{here::here()}/data/buli_squad_stats_hinrunde.RDS")) | |
buli_squad_stats_hinrunde <- readRDS(file = glue("{here::here()}/data/buli_squad_stats_hinrunde.RDS")) | |
``` | |
## player stats | |
```{r} | |
buli_player_stats_hinrunde_raw <- read_csv(here::here("data/buli_player_stats_hinrunde.csv"), | |
skip = 1, col_names = TRUE) | |
buli_player_stats_hinrunde <- buli_player_stats_hinrunde_raw %>% | |
rename_at(vars(17:21), | |
~ glue::glue("{colnames(buli_player_stats_hinrunde_raw)[17:21]}_p90")) %>% | |
rename_at(vars(25:29), | |
~ glue::glue("{colnames(buli_player_stats_hinrunde_raw)[25:29]}_p90")) | |
buli_player_stats_hinrunde <- buli_player_stats_hinrunde %>% | |
rename_at(vars(contains("_1")), ~ str_replace(., "_1", "")) %>% | |
select(-Matches, -Rk) | |
glimpse(buli_player_stats_hinrunde) | |
``` | |
## clean | |
```{r} | |
goal_contribution_clean_df <- buli_player_stats_hinrunde %>% | |
left_join(buli_squad_stats_hinrunde, by = "Squad") %>% | |
separate(Player, into = c("fullname", "allname"), sep = "\\\\") %>% | |
separate(fullname, into = c("firstname", "lastname"), | |
sep = "\\s", extra = "merge", | |
remove = FALSE) %>% | |
mutate(fname = str_extract(firstname, "[A-Z]{1}")) %>% | |
## players like Fabinho listed without Tavares last name | |
mutate(player = if_else(is.na(lastname), firstname, lastname)) %>% | |
mutate(player = case_when( | |
!is.na(lastname) ~ glue("{fname}. {lastname}"), | |
TRUE ~ firstname)) %>% | |
group_by(fullname) %>% | |
mutate(goal_contrib = Gls / Gls_squad, | |
assist_contrib = Ast / Gls_squad) %>% | |
ungroup() %>% | |
## keep fullname so can differentiate A. Gomes (Andre/Angel), etc. | |
select(player, fullname, Pos, Squad, Min, | |
Gls, Gls_squad, PK, PK_squad, | |
Ast, Ast_squad, | |
goal_contrib, assist_contrib, | |
npxG_p90, xA_p90, npxG, xA) | |
glimpse(goal_contribution_clean_df) | |
``` | |
## plot | |
```{r} | |
xG_xA_per90 <- goal_contribution_clean_df | |
``` | |
```{r} | |
xG_xA_per90 %>% | |
filter(Min >= 900, | |
Pos %in% c("FW", "FWMF")) %>% | |
summarize(avg_npxg90 = median(npxG_p90), | |
avg_xA = median(xA_p90)) | |
``` | |
- Lewa: 14 > 12 np 8.3 np-xG | |
- Muller: 0.32 xG per 90 (Kovac), 0.4 xG per 90 (Flick) | |
-- 512 mins Flick 0.53 xA per 90 (Kovac), 0.58 xA per 90 (Flick) | |
- Gnabry: | |
-- | |
- Plea: 0.48 npxG per 90, 0.28 xA per 90 | |
- Thuram: 0.39 npxG per 90, 0.27 xA per 90 | |
- Embolo: 0.34 npxG per 90, 0.27 xA per 90 | |
```{r} | |
desc_bayern <- "Despite Niko Kovac's poor tactics & his eventual sacking, Bayern maintained great attacking output mainly through Robert Lewandowski's great finishing (12 np Goals from 8.3 np xG under Kovac)! Thomas Müller, having been frozen out by Kovac, got back to form under new manager Hansi Flick (0.32 xG per 90 vs. 0.4 xG per 90 & 0.53 xA per 90 vs. 0.58 xA per 90). Serge Gnabry has been a consistent threat throughout while Phil Coutinho has been doing well after a slow start." | |
desc_gladbach <- "Although he has cooled off a bit from an electric 4 goals & 4 assists in the first 7 games, Alassane Plea still provides the most threat with a team-leading 0.48 np-xG per 90 & 0.28 xA per 90 (Patrick Herrmann with 0.54 xG per 90 & 0.32 xA per 90 just misses out having only played 790 minutes). Plea is supported by the new arrivals Breel Embolo & Marcus Thuram who complete this dynamic trident that excels on transition plays." | |
``` | |
```{r fig.width = 12, fig.height = 10} | |
xG_xA_per90_plot <- xG_xA_per90 %>% | |
filter(Min >= 900) %>% | |
ggplot(aes(xA_p90, npxG_p90)) + | |
geom_point(data = xG_xA_per90 %>% | |
filter(xA_p90 < 0.15 | npxG_p90 < 0.3, | |
Min >= 900), | |
color = "grey20", size = 4, alpha = 0.2) + | |
geom_point(data = xG_xA_per90 %>% | |
filter(xA_p90 > 0.15 | npxG_p90 > 0.3, | |
Min >= 900), | |
color = "red", size = 4) + | |
geom_hline(yintercept = 0.3, alpha = 0.6) + | |
geom_vline(xintercept = 0.15, alpha = 0.6) + | |
## All player labels | |
geom_text_repel( | |
data = xG_xA_per90 %>% | |
filter(xA_p90 > 0.15 | npxG_p90 > 0.3, | |
!player %in% c("R. Lewandowski", | |
"S. Gnabry", "T. Müller", | |
"P. Coutinho", | |
"M. Thuram", | |
"B. Embolo", "A. Pléa", | |
"K. Volland", | |
"F. Niederlechner", | |
"T. Werner"), | |
Min >= 900), | |
aes(label = player, family = "Roboto Condensed", | |
fontface = "bold"), | |
seed = 15, size = 3.5, | |
min.segment.length = 0, segment.color = "red", | |
point.padding = 0.5) + | |
## Separate player | |
geom_text(data = xG_xA_per90 %>% | |
filter(player %in% c("T. Werner", | |
"F. Niederlechner", | |
"K. Volland")), | |
aes(label = player, family = "Roboto Condensed", | |
fontface = "bold"), | |
size = 3.5, nudge_x = 0.01, hjust = 0) + | |
## Bayern player labels | |
geom_text_repel( | |
data = xG_xA_per90 %>% | |
filter(player %in% c("R. Lewandowski", "P. Coutinho", | |
"S. Gnabry", "T. Müller")), | |
aes(label = player, family = "Roboto Condensed", | |
fontface = "bold"), | |
seed = 15, size = 4.5, color = "red", | |
min.segment.length = 0, segment.color = "red", | |
point.padding = 0.5, nudge_y = 0.05) + | |
## Gladbach player labels | |
geom_text_repel( | |
data = xG_xA_per90 %>% | |
filter(player %in% c("M. Thuram", | |
"B. Embolo", "A. Pléa")), | |
aes(label = player, family = "Roboto Condensed", | |
fontface = "bold"), | |
seed = 15, size = 4.5, color = "#228B22", | |
min.segment.length = 0, segment.color = NA, | |
point.padding = 0.5) + | |
## Bayern description | |
geom_mark_hull( | |
aes(filter = player %in% c("R. Lewandowski", | |
"S. Gnabry", | |
"T. Müller", | |
"P. Coutinho"), | |
label = "Bayern's Attack Remains Strong.", | |
description = desc_bayern), | |
expand = unit(2.5, "mm"), con.cap = unit(0, "mm"), | |
label.width = unit(130, 'mm'), label.buffer = unit(5, "mm"), | |
label.family = "Roboto Condensed", label.fontsize = c(14, 12), | |
label.colour = c("red", "black")) + | |
## Gladbach description | |
geom_mark_hull( | |
aes(filter = player %in% c("M. Thuram", | |
"B. Embolo"), | |
label = "Gladbach's Attacking Trident", | |
description = desc_gladbach), | |
expand = unit(0.05, "mm"), con.cap = unit(0, "mm"), | |
label.width = unit(65, 'mm'), label.buffer = unit(49, "mm"), | |
label.family = "Roboto Condensed", label.fontsize = c(14, 11), | |
label.colour = c("#228B22", "black")) + | |
geom_mark_circle( | |
aes(filter = player == "A. Pléa"), | |
expand = unit(3, "mm"), label.width = unit(50, 'mm'), | |
label.buffer = unit(30, "mm"), label.fontsize = c(14, 11), | |
label.family = "Roboto Condensed", | |
label.colour = c("#228B22", "black")) + | |
## Gladbach desc. | |
annotate("segment", | |
x = 0.288, xend = 0.37, | |
y = 0.48, yend = 0.48) + | |
annotate("segment", | |
x = 0.37, xend = 0.37, | |
y = 0.48, yend = 0.348) + | |
## xG and xA league average | |
annotate("text", family = "Roboto Condensed", fontface = "bold", | |
x = 0.05, y = 0.94, hjust = 0, #color = "red", | |
label = "Average xA per 90: 0.15") + | |
annotate("text", family = "Roboto Condensed", fontface = "bold", | |
x = 0.62, y = 0.29, #color = "red", | |
label = "Average np-xG per 90: 0.3") + | |
scale_x_continuous(labels = seq(0, 0.9, 0.1), | |
breaks = seq(0, 0.9, 0.1), | |
limits = c(0, 0.65)) + | |
scale_y_continuous(labels = seq(0, 0.9, 0.1), | |
breaks = seq(0, 0.9, 0.1), | |
limits = c(0, 0.95)) + | |
labs(title = "<b style='color: #228B22'>Gladbach's Trident</b> & <b style='color: red'>Bayern's Attackers</b> Lead the League in xG per 90 & xA per 90", | |
subtitle = glue(" | |
Bundesliga (2019-2020) | Hinrunde | January 14th, 2020 | |
<p><b style='color: black'>Average (Median)</b> for Midfielders/Forwards | Minimum 900 Minutes Played"), | |
caption = glue(" | |
Data: FBref | StatsBomb | |
Ryo Nakagawara, Twitter: @R_by_Ryo"), | |
x = "Expected Assists (xA) per 90", | |
y = "non-Penalty Expected Goals (np-xG) per 90") + | |
theme_minimal() + | |
theme(text = element_text(family = "Roboto Condensed"), | |
plot.title = element_markdown(size = 18), | |
plot.subtitle = element_markdown(size = 16), | |
plot.caption = element_text(size = 14), | |
axis.title = element_text(size = 14), | |
axis.text = element_text(size = 12), | |
panel.grid.minor.x = element_blank()) | |
xG_xA_per90_plot | |
``` | |
#### save | |
```{r} | |
ggsave(plot = xG_xA_per90_plot, | |
here::here("Bundesliga 2019-2020/output/xA_xG_per90_1920_hinrunde.png"), | |
height = 10, width = 12) | |
``` | |
```{r} | |
plot_logo <- add_logo( | |
plot_path = here::here("Bundesliga 2019-2020/output/xA_xG_per90_1920_hinrunde.png"), | |
logo_path = here::here("../soccer_match_reports/img/Bundesliga_logo_(2017).svg"), | |
logo_position = "top right", | |
logo_scale = 14) | |
plot_logo | |
``` | |
```{r} | |
image_write(image = plot_logo, | |
here::here("Bundesliga 2019-2020/output/xA_xG_per90_1920_hinrunde_logo.png")) | |
``` | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment