Skip to content

Instantly share code, notes, and snippets.

@Ryo-N7
Last active May 7, 2020 11:05
Show Gist options
  • Save Ryo-N7/bc27152c3c94dedbe7262973063ae588 to your computer and use it in GitHub Desktop.
Save Ryo-N7/bc27152c3c94dedbe7262973063ae588 to your computer and use it in GitHub Desktop.
Premier League Top Goalscorers by Minutes Played
# pkgs
```{r, message=FALSE, warning=FALSE}
pacman::p_load(dplyr, tidyr, purrr, scales,
ggplot2, ggtext, tibble, stringr,
glue, extrafont, ggbump, forcats)
loadfonts()
```
## raw data (scraped from transfermarkt)
```{r}
## here::here() >>> https://github.com/Ryo-N7/soccer_ggplots
## https://github.com/Ryo-N7/soccer_ggplots/data/goal_timeline_df_raw_42920.RDS
goal_timeline_df_raw <- readRDS(here::here("data/goal_timeline_df_raw_42920.RDS"))
```
Used the indices/cumsum functions created in scraped data to find cumulative sum of mins, goals, etc. to create a new df:
```{r}
epl_topscorers_df_clean <- tibble(
player = c("Alan Shearer", "Wayne Rooney", "Andy Cole", "Sergio Agüero",
"Frank Lampard", "Thierry Henry", "Robbie Fowler",
"Jermain Defoe", "Michael Owen"),
nationality = c("England", "England", "England", "Argentina", "England",
"France", "England", "England", "England"),
min50 = c(5351, 10438, 5844, 5709, 23360, 6146, 7619, 11163, 7646),
min100 = c(10586, 18607, 14962, 10672, 34499, 13016, 14971, 19538, 14116),
min150 = c(18307, 25303, 22458, 16099, 44172, 18398, 25610, 29190, 22986),
minTOTAL = c(38190, 36969, 30982, 18943, 48866, 21310, 27509, 32411, 22986),
total_mins = c(38200, 38027, 31613, 19250, 48912, 21310, 27964, 32651, 23021),
total_goals = c(260, 208, 187, 180, 177, 175, 163, 162, 150),
total_assists = c(12, 111, 32, 53, 118, 77, 12, 27, 8)
)
glimpse(epl_topscorers_df_clean)
```
# reshape
```{r}
epl_topscorers_df <- epl_topscorers_df_clean %>%
mutate(goalsper90 = total_goals / total_mins * 90,
assistsper90 = total_assists / total_mins * 90) %>%
pivot_longer(cols = min50:minTOTAL,
names_to = "milestone", values_to = "value") %>%
mutate(color = case_when(
player == "Alan Shearer" ~ "#241F20",
player == "Sergio Agüero" ~ "#6CABDD",
player == "Wayne Rooney" ~ "#FF8C00",
player == "Andy Cole" ~ "#009036",
player == "Frank Lampard" ~ "#034694",
player == "Robbie Fowler" ~ "#fdbc00",
player == "Michael Owen" ~ "#c8102E",
player == "Jermain Defoe" ~ "#132257",
player == "Thierry Henry" ~ "#b2101d",
TRUE ~ "grey80"),
milestone = as_factor(milestone) %>%
fct_relevel("min50", "min100", "min150", "minTOTAL"))
## can use glue or str_wrap for indenting as well...
x_labels <- c("Number of Minutes\nto Reach 50 Goals",
"...to Reach 100 goals",
"...to Reach 150 goals",
"...to Reach \nTotal Premier League Goals")
glimpse(epl_topscorers_df)
```
## info text (for right-hand-side ordered top goal scorers
```{r}
info_df <- tibble(
x = rep(5.3, 9),
y = c(7500, 12500, 17500,
22500, 27500, 32500,
37500, 42500, 47500),
text = c("Alan Shearer: 260", "Wayne Rooney: 208", "Andy Cole: 187",
"Sergio Agüero: 180", "Frank Lampard: 177", "Thierry Henry: 175",
"Robbie Fowler: 163", "Jermain Defoe: 162", "Michael Owen: 150"),
color = c("#241F20", "#FF8C00", "#009036",
"#6CABDD", "#034694", "#b2101d",
"#fdbc00", "#132257", "#d00027"))
glimpse(info_df)
```
# plot
```{r fig.width=16, fig.height=11}
ggplot() +
## left arrows (ggbump)
geom_bump(data = epl_topscorers_df ,
aes(x = milestone, y = value, group = player,
color = color), size = 5, smooth = 7) +
## y-axis arrow
geom_segment(aes(x = Inf, xend = Inf, y = 1000, yend = 49000), size = 0.8,
arrow = arrow(length = unit(0.6, "cm"))) +
## right arrows
## Henry
geom_curve(aes(x = 4, xend = 4.2, y = 21310, yend = 26905),
curvature = -0.35, color = "#b2101d",
angle = 95, size = 4.5) +
geom_curve(aes(x = 4.2, xend = 4.4, y = 26905, yend = 32600),
curvature = 0.35, color = "#b2101d", lineend = "round",
angle = 95, size = 4.25) +
geom_segment(aes(x = 4.45, xend = 4.48, y = 32500, yend = 32500),
color = "#b2101d", linejoin = "round", size = 1.25,
arrow = arrow(length = unit(0.8, "cm"), angle = 40,
type = "closed")) +
## Andy Cole
geom_curve(aes(x = 4, xend = 4.2, y = 31150, yend = 27250),
curvature = 0.25, color = "#009036",
angle = 45, size = 4.5) +
geom_curve(aes(x = 4.2, xend = 4.45, y = 27050, yend = 17400),
curvature = -0.45, color = "#009036",
angle = 45, size = 4.5) +
geom_segment(aes(x = 4.45, xend = 4.48, y = 17500, yend = 17500),
color = "#009036", linejoin = "round", size = 1.25,
arrow = arrow(length = unit(0.8, "cm"), angle = 40,
type = "closed")) +
## Robbie Fowler
geom_curve(aes(x = 4, xend = 4.2, y = 27500, yend = 32732),
curvature = -0.35, color = "#fdbc00",
angle = 95, size = 4.5) +
geom_curve(aes(x = 4.2, xend = 4.45, y = 32732, yend = 37600),
curvature = 0.35, color = "#fdbc00", lineend = "round",
angle = 95, size = 4.25) +
geom_segment(aes(x = 4.45, xend = 4.48, y = 37500, yend = 37500),
color = "#fdbc00", linejoin = "round", size = 1.25,
arrow = arrow(length = unit(0.8, "cm"), angle = 40,
type = "closed")) +
## Defoe
geom_curve(aes(x = 4, xend = 4.2, y = 32500, yend = 37575.5),
curvature = -0.35, color = "#132257",
angle = 95, size = 4.5) +
geom_curve(aes(x = 4.2, xend = 4.45, y = 37575.5, yend = 42700),
curvature = 0.35, color = "#132257", lineend = "round",
angle = 95, size = 4.25) +
geom_segment(aes(x = 4.45, xend = 4.48, y = 42500, yend = 42500),
color = "#132257", linejoin = "round", size = 1.25,
arrow = arrow(length = unit(0.8, "cm"), angle = 40,
type = "closed")) +
## Owen
geom_curve(aes(x = 4, xend = 4.2, y = 23021, yend = 35260.5),
curvature = -0.35, color = "#d00027",
angle = 125, size = 4.5) +
geom_curve(aes(x = 4.2, xend = 4.42, y = 35260.5, yend = 47700),
curvature = 0.35, color = "#d00027", lineend = "round",
angle = 115, size = 4.25) +
geom_segment(aes(x = 4.45, xend = 4.48, y = 47500, yend = 47500),
color = "#d00027", linejoin = "round", size = 1.25,
arrow = arrow(length = unit(0.8, "cm"), angle = 40,
type = "closed")) +
## Shearer
geom_curve(aes(x = 4, xend = 4.2, y = 38500, yend = 23000),
curvature = 0.25, color = "#241F20",
angle = 45, size = 4.5) +
geom_curve(aes(x = 4.2, xend = 4.45, y = 23000, yend = 7300),
curvature = -0.45, color = "#241F20",
angle = 45, size = 4.25) +
geom_segment(aes(x = 4.45, xend = 4.48, y = 7500, yend = 7500),
color = "#241F20", linejoin = "round", size = 1.25,
arrow = arrow(length = unit(0.8, "cm"), angle = 40,
type = "closed")) +
## Rooney
geom_curve(aes(x = 3.99, xend = 4.2, y = 37100, yend = 21875),
curvature = 0.25, color = "#FF8C00",
angle = 45, size = 4.5) +
geom_curve(aes(x = 4.2, xend = 4.45, y = 21875, yend = 12400),
curvature = -0.45, color = "#FF8C00",
angle = 45, size = 4.5) +
geom_segment(aes(x = 4.45, xend = 4.48, y = 12500, yend = 12500),
color = "#FF8C00", linejoin = "round", size = 1.25,
arrow = arrow(length = unit(0.8, "cm"), angle = 40,
type = "closed")) +
## Aguero
geom_curve(aes(x = 4, xend = 4.2, y = 19000, yend = 20750),
curvature = -0.45, color = "#6CABDD",
angle = 45, size = 4.5) +
geom_curve(aes(x = 4.2, xend = 4.45, y = 20750, yend = 22500),
curvature = 0.45, color = "#6CABDD", lineend = "round",
angle = 45, size = 4.5) +
geom_segment(aes(x = 4.45, xend = 4.48, y = 22500, yend = 22500),
color = "#6CABDD", linejoin = "round", size = 1.25,
arrow = arrow(length = unit(0.8, "cm"), angle = 40,
type = "closed")) +
## Lampard
geom_curve(aes(x = 4, xend = 4.2, y = 49150, yend = 38375),
curvature = 0.25, color = "#034694",
angle = 45, size = 4.5) +
geom_curve(aes(x = 4.2, xend = 4.4, y = 38375, yend = 27350),
curvature = -0.45, color = "#034694", lineend = "round",
angle = 45, size = 4.25) +
geom_segment(aes(x = 4.45, xend = 4.48, y = 27500, yend = 27500),
color = "#034694", linejoin = "round", size = 1.25,
arrow = arrow(length = unit(0.8, "cm"), angle = 40,
type = "closed")) +
## separator
geom_vline(xintercept = 4, color = "grey80", size = 2.5) +
geom_text(data = info_df,
aes(x = x, y = y,
label = text, colour = color),
size = 6.5, hjust = "right",
family = "Roboto Slab", fontface = "bold") +
## scales
scale_color_identity() +
scale_x_discrete(expand = expansion(add = c(0.18, 1.4)),
labels = x_labels, position = "top") +
scale_y_reverse(position = "right", "Minutes Played\n",
labels = scales::comma) +
## title/caption
annotate("text", x = 1.015, y = 45000,
label = glue::glue("
Number of Minutes Played to Reach
50, 100, 150, & Career Goals
for Players with 150 or More
Total Premier League Goals"),
hjust = 0, size = 6.75, color = "black",
family = "Roboto Slab", fontface = "bold") +
annotate("text", x = 1.015, y = 51800,
label = glue::glue("
Data: transfermarkt.com | premierleague.com
Date: May 4th, 2020
Graphic: Ryo Nakagawara (Twitter: @R_by_Ryo)"),
hjust = 0, size = 4, color = "black",
family = "Roboto Slab") +
coord_cartesian(clip = "off") +
theme(legend.position = "none",
text = element_text(family = "Roboto Slab", color = "black"),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 18),
axis.text = element_text(size = 16, color = "black",
family = "Roboto Slab"),
panel.grid = element_line(color = "grey80"),
plot.background = element_rect(fill = "#FAEBD7"),
panel.background = element_rect(fill = "#FAEBD7"),
axis.ticks = element_blank(),
plot.margin = margin(40, 30, 40, 40))
```
## save
```{r}
ggsave(here::here("Premier League 2019-2020/output/EPL_topscorers_minutes2020.png"), width = 16, height = 11,
dpi = 320)
```
@Ryo-N7
Copy link
Author

Ryo-N7 commented May 4, 2020

EPL_topscorers_minutes2020

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment