Looking at the biggest differences in 2024/25 xG in data scraped right after the match and then later updated to account for ball height and defender positioning.
library(dplyr)
library(tibble)
joined_data <- readRDS('joined_fb_match_shooting_big5_20241201.rds')
agg_joined_data <- joined_data |>
group_by(MatchURL, Date, Squad) |>
summarize(
across(
c(xG, repo_xG, PSxG, repo_PSxG),
\(.x) sum(.x, na.rm = TRUE)
)
) |>
ungroup() |>
mutate(
d_xG = xG - repo_xG,
d_PSxG = PSxG - repo_PSxG
) |>
arrange(desc(abs(d_xG)))
# big PSxG change (3rd biggest): https://youtu.be/t7p9-DfgD5M?si=arT3XBWjHvg90Gg1&t=210
# big xG change (biggest in data set): https://youtu.be/t7p9-DfgD5M?si=F5OsqmGLCb_0QBcH&t=669
example_match <- joined_data |>
filter(MatchURL == 'https://fbref.com/en/matches/1714cebe/Chelsea-Brighton-and-Hove-Albion-September-28-2024-Premier-League') |>
select(Minute, Player, xG, PSxG, repo_xG, repo_PSxG, d_xG, d_PSxG)
joined_data |>
filter(
xG != repo_xG,
Competition_Name == 'Premier League'
) |>
arrange(desc(abs(d_xG))) |>
select(MatchURL, Minute, Player, xG, PSxG, repo_xG, repo_PSxG, d_xG, d_PSxG, Outcome)
joined_data |>
filter(
PSxG != repo_PSxG,
Competition_Name == 'Premier League'
) |>
arrange(desc(abs(d_PSxG))) |>
select(MatchURL, Minute, Player, xG, PSxG, repo_xG, repo_PSxG, d_xG, d_PSxG, Outcome)