Skip to content

Instantly share code, notes, and snippets.

@Ryo-N7
Last active November 21, 2019 09:43
Show Gist options
  • Save Ryo-N7/e439c5da60bb80c8c9ce646e87149e56 to your computer and use it in GitHub Desktop.
Save Ryo-N7/e439c5da60bb80c8c9ce646e87149e56 to your computer and use it in GitHub Desktop.
Visualize the EPL, Part 1
## Packages
pacman::p_load(tidyverse, polite, scales, ggimage, ggforce,
understatr, cowplot, kableExtra, ggbeeswarm,
jsonlite, xml2, qdapRegex, stringi, stringr,
rvest, glue, extrafont, ggrepel, magick, ggtext)
loadfonts(quiet = TRUE)
## xPts Table
premierleague2019 <- get_league_teams_stats("EPL", 2019)
## NON-PENALTY xG
xpts_table <- premierleague2019 %>%
group_by(team_name) %>%
summarize(xPts = sum(xpts),
Points = sum(pts),
W = sum(wins),
D = sum(draws),
L = sum(loses),
For = sum(scored),
Against = sum(missed),
xG = sum(npxG),
xGA = sum(npxGA)) %>%
mutate(xPts = round(xPts, digits = 1),
xG = round(xG, digits = 1),
xGA = round(xGA, digits = 1),
GD = For - Against,
xGD = xG - xGA) %>%
arrange(-Points, -GD, -For) %>%
mutate(real_rank = row_number(),
PD = xPts - Points) %>%
mutate(team_name = glue("{team_name} ({real_rank})")) %>%
arrange(-xPts, -xG, -For) %>%
mutate(rank_diff = real_rank - row_number(),
GD_diff = GD - xGD) %>%
select(team_name, real_rank, rank_diff, xPts, Points, PD,
W, D, L, For, Against, GD,
xG, xGA, xGD, GD_diff) %>%
select(-real_rank, -rank_diff, -GD_diff)
xpts_table %>%
rename(`Team (Actual Rank)` = team_name) %>%
knitr::kable(format = "html",
caption = "League Table by xPts") %>%
kable_styling(full_width = FALSE,
bootstrap_options = c("condensed", "responsive")) %>%
add_header_above(c(" ", "Points" = 3, "Result" = 3, "Goals" = 3,
"Expected Goals" = 3)) %>%
column_spec(1:2, bold = TRUE) %>%
row_spec(1:4, bold = TRUE, color = "white", background = "green") %>%
row_spec(5:17, bold = TRUE, color = "grey", background = "white") %>%
row_spec(18:20, color = "white", background = "red")
## xGD plot
## fig.height = 7, fig.width = 9
xGD_plot <- premierleague2019 %>%
select(team_name, everything()) %>%
group_by(team_name) %>%
summarize(sum_npxG = sum(npxG),
sum_npxGA = sum(npxGA)) %>%
mutate(npxGD = sum_npxG - sum_npxGA,
team_name = as_factor(team_name),
team_name = fct_reorder(team_name, npxGD),
half = npxGD / 2) %>%
arrange(desc(npxGD)) %>%
ggplot(aes(x = team_name, y = npxGD)) +
geom_col(color = "black", fill = "#00ff85") +
geom_text(aes(y = half, label = round(npxGD, digits = 1)),
color = "#38003c", size = 4, fontface = "bold",
family = "Roboto Condensed") +
scale_y_continuous(expand = c(0.01, 0)) +
labs(title = "Non-Penalty Expected Goal Difference",
subtitle = "As of Nov. 10, 2019 (Matchday 12)",
x = NULL, y = "Non-Penalty xGD",
caption = glue("Twitter: @R_by_Ryo Data: understat.com")) +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed",
color = "white"),
title = element_text(size = 18),
axis.text = element_text(color = "white", size = 14),
plot.background = element_rect(fill = "#38003c")) +
coord_flip()
xGD_plot
ggsave(plot = xGD_plot,
filename = here::here("Premier League 2019-2020/output/xGD_plot.png"),
height = 7, width = 9)
## add_logo function from Thomas Mock
add_logo <- function(plot_path, logo_path, logo_position, logo_scale = 10){
# Requires magick R Package https://github.com/ropensci/magick
# Useful error message for logo position
if (!logo_position %in% c("top right", "top left", "bottom right", "bottom left")) {
stop("Error Message: Uh oh! Logo Position not recognized\n Try: logo_positon = 'top left', 'top right', 'bottom left', or 'bottom right'")
}
# read in raw images
plot <- magick::image_read(plot_path)
logo_raw <- magick::image_read(logo_path)
# get dimensions of plot for scaling
plot_height <- magick::image_info(plot)$height
plot_width <- magick::image_info(plot)$width
# default scale to 1/10th width of plot
# Can change with logo_scale
logo <- magick::image_scale(logo_raw, as.character(plot_width/logo_scale))
# Get width of logo
logo_width <- magick::image_info(logo)$width
logo_height <- magick::image_info(logo)$height
# Set position of logo
# Position starts at 0,0 at top left
# Using 0.01 for 1% - aesthetic padding
if (logo_position == "top right") {
x_pos = plot_width - logo_width - 0.01 * plot_width
y_pos = 0.01 * plot_height
} else if (logo_position == "top left") {
x_pos = 0.01 * plot_width
y_pos = 0.01 * plot_height
} else if (logo_position == "bottom right") {
x_pos = plot_width - logo_width - 0.01 * plot_width
y_pos = plot_height - logo_height - 0.01 * plot_height
} else if (logo_position == "bottom left") {
x_pos = 0.01 * plot_width
y_pos = plot_height - logo_height - 0.01 * plot_height
}
# Compose the actual overlay
magick::image_composite(plot, logo, offset = paste0("+", x_pos, "+", y_pos))
}
xGD_plot_logo <- add_logo(
plot_path = here::here("Premier League 2019-2020/output/xGD_plot.png"),
logo_path = "https://static.dezeen.com/uploads/2016/08/designstudiopremier-league-rebrand-relaunch-logo-design-barclays-football_dezeen_slideshow-a-852x609.jpg",
logo_position = "top right",
logo_scale = 10)
image_write(image = xGD_plot_logo,
path = here::here("Premier League 2019-2020/output/xGD_plot_logo.png"))
## xG & xGA per game
## fig.height = 7, fig.width = 9
xGperGame_plot <- premierleague2019 %>%
select(team_name, everything()) %>%
group_by(team_name) %>%
summarize(sum_npxG = sum(npxG),
sum_npxGA = sum(npxGA)) %>%
mutate(npxGD = sum_npxG - sum_npxGA,
npxGperGame = sum_npxG / 12,
team_name = as_factor(team_name),
team_name = fct_reorder(team_name, npxGperGame),
half = npxGperGame / 2) %>%
ungroup() %>%
arrange(desc(npxGperGame)) %>%
ggplot(aes(x = team_name, y = npxGperGame)) +
geom_col(color = "black", fill = "#00ff85") +
geom_text(aes(y = half, label = round(npxGperGame, digits = 2)),
color = "#38003c", size = 5, fontface = "bold",
family = "Roboto Condensed") +
scale_y_continuous(expand = c(0.01, 0)) +
labs(title = "Non-Penalty xG per Game",
subtitle = "As of Nov. 10, 2019 (Matchday 12)",
x = NULL, y = "Non-Penalty xG per Game",
caption = glue(" ")) +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed",
color = "white"),
title = element_text(size = 18),
axis.text = element_text(color = "white", size = 14),
plot.background = element_rect(fill = "#38003c")) +
coord_flip()
xGperGame_plot
## fig.height = 7, fig.width = 9
xGAperGame_plot <- premierleague2019 %>%
select(team_name, everything()) %>%
group_by(team_name) %>%
summarize(sum_npxG = sum(npxG),
sum_npxGA = sum(npxGA)) %>%
mutate(npxGD = sum_npxG - sum_npxGA,
npxGAperGame = sum_npxGA / 12,
team_name = as_factor(team_name),
team_name = fct_reorder(team_name, npxGAperGame),
half = npxGAperGame / 2) %>%
ungroup() %>%
#arrange(desc(npxGAperGame)) %>%
ggplot(aes(x = fct_rev(team_name), y = npxGAperGame)) +
geom_col(color = "black", fill = "#00ff85") +
geom_text(aes(y = half, label = round(npxGAperGame, digits = 2)),
color = "#38003c", size = 5, fontface = "bold",
family = "Roboto Condensed") +
scale_y_continuous(expand = c(0.01, 0)) +
labs(title = "Non-Penalty xGA per Game",
subtitle = "As of Nov. 10, 2019 (Matchday 12)",
x = NULL, y = "Non-Penalty xGA per Game",
caption = glue("Twitter: @R_by_Ryo Data: understat.com")) +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed",
color = "white"),
title = element_text(size = 18),
axis.text = element_text(color = "white", size = 14),
plot.background = element_rect(fill = "#38003c")) +
coord_flip()
xGAperGame_plot
## fig.height = 7, fig.width = 18
xG_AperGame_plot <- cowplot::plot_grid(xGperGame_plot, xGAperGame_plot)
ggsave(plot = xG_AperGame_plot,
filename = here::here("Premier League 2019-2020/output/xG_AperGame_plot.png"),
height = 7, width = 18)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment