Skip to content

Instantly share code, notes, and snippets.

@tonyelhabr
Created December 2, 2024 12:37
Show Gist options
  • Save tonyelhabr/5dfb183b67ccacd966b52bbfcd17935c to your computer and use it in GitHub Desktop.
Save tonyelhabr/5dfb183b67ccacd966b52bbfcd17935c to your computer and use it in GitHub Desktop.

Looking at the biggest differences in 2024/25 xG in data scraped right after the match and then later updated to account for ball height and defender positioning.

library(dplyr)
library(tibble)

joined_data <- readRDS('joined_fb_match_shooting_big5_20241201.rds')


agg_joined_data <- joined_data |> 
  group_by(MatchURL, Date, Squad) |> 
  summarize(
    across(
      c(xG, repo_xG, PSxG, repo_PSxG),
      \(.x) sum(.x, na.rm = TRUE)
    )
  ) |> 
  ungroup() |> 
  mutate(
    d_xG = xG - repo_xG,
    d_PSxG = PSxG - repo_PSxG
  ) |> 
  arrange(desc(abs(d_xG)))

# big PSxG change (3rd biggest): https://youtu.be/t7p9-DfgD5M?si=arT3XBWjHvg90Gg1&t=210
# big xG change (biggest in data set): https://youtu.be/t7p9-DfgD5M?si=F5OsqmGLCb_0QBcH&t=669
example_match <- joined_data |> 
  filter(MatchURL == 'https://fbref.com/en/matches/1714cebe/Chelsea-Brighton-and-Hove-Albion-September-28-2024-Premier-League') |> 
  select(Minute, Player, xG, PSxG, repo_xG, repo_PSxG, d_xG, d_PSxG)

joined_data |> 
  filter(
    xG != repo_xG,
    Competition_Name == 'Premier League'
  ) |> 
  arrange(desc(abs(d_xG))) |> 
  select(MatchURL, Minute, Player, xG, PSxG, repo_xG, repo_PSxG, d_xG, d_PSxG, Outcome)

joined_data |> 
  filter(
    PSxG != repo_PSxG,
    Competition_Name == 'Premier League'
  ) |> 
  arrange(desc(abs(d_PSxG))) |> 
  select(MatchURL, Minute, Player, xG, PSxG, repo_xG, repo_PSxG, d_xG, d_PSxG, Outcome)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment