Skip to content

Instantly share code, notes, and snippets.

@tonyelhabr
Last active February 15, 2023 20:52
Show Gist options
  • Save tonyelhabr/3ad0bc9a9d3f8107c83062c8caec8b60 to your computer and use it in GitHub Desktop.
Save tonyelhabr/3ad0bc9a9d3f8107c83062c8caec8b60 to your computer and use it in GitHub Desktop.
Comparing new and old Opta xG data on FBref and Fotmob
library(worldfootballR)
library(dplyr)
library(stringr)
library(readr)
library(purrr)
library(janitor)

# https://fbref.com/en/matches/a6ff9cf9/Chelsea-Norwich-City-October-23-2021-Premier-League
# https://www.fotmob.com/match/3610012/matchfacts/chelsea-vs-norwich-city
# https://understat.com/match/16459

old_fb_shots <- load_fb_match_shooting(
  country = 'ENG',
  tier = '1st',
  gender = 'M',
  season_end_year = 2022
)

old_fotmob_shots <- load_fotmob_match_details(
  league_id = 47
)

old_understat_shots <- load_understat_league_shots('EPL')

example_fotmob_match_id <- 3610012
example_fbref_match_url <- 'https://fbref.com/en/matches/a6ff9cf9/Chelsea-Norwich-City-October-23-2021-Premier-League'
example_understat_match_id <- 16459

example_old_fotmob_match_shots <- old_fotmob_shots |> 
  filter(match_id == !!example_fotmob_match_id)

example_old_fbref_match_shots <- old_fb_shots |> 
  filter(MatchURL == !!example_fbref_match_url)

example_old_understat_match_shots <- old_understat_shots |> 
  filter(match_id == !!example_understat_match_id) |> 
  as_tibble()

example_new_fotmob_match_shots <- fotmob_get_match_details(example_fotmob_match_id)
example_new_fbref_match_shots <- fb_match_shooting(example_fbref_match_url)
example_new_understat_match_shots <- understat_match_shots(sprintf('https://understat.com/match/%s', example_understat_match_id)) |> 
  as_tibble() |> 
  janitor::clean_names() |> 
  rename(h_a = home_away)

summarize_fbref_match_xg <- function(df) {
  df |>
    group_by(team = Squad) |>
    summarize(
      xg = sum(as.numeric(xG), na.rm = TRUE),
      xgot = sum(as.numeric(PSxG), na.rm = TRUE),
      g = sum(Outcome == 'Goal')
    )
}

summarize_fotmob_match_xg <- function(df) {
  df |>
    group_by(team = ifelse(team_id == home_team_id, home_team, away_team)) |>
    summarize(
      xg = sum(expected_goals, na.rm = TRUE),
      xgot = sum(expected_goals_on_target, na.rm = TRUE),
      g = sum(event_type == 'Goal'),
      og = sum(is_own_goal)
    )
}

summarize_understat_match_xg <- function(df) {
  df |>
    group_by(team = ifelse(h_a == 'h', home_team, away_team)) |>
    summarize(
      xg = sum(x_g, na.rm = TRUE),
      g = sum(result == 'Goal'),
      og = sum(result == 'OwnGoal')
    )
}

clean_player_name <- function(x) {
  iconv(x, to = 'ASCII//TRANSLIT')
}

transmute_fbref_match_shots <- function(df) {
  df |> 
    as_tibble() |> 
    transmute(
      team = Squad,
      player = clean_player_name(str_remove(Player, ' \\(.*$')),
      half = Match_Half,
      minute = as.integer(str_remove(Minute, '[+].*$')),
      minute_added = ifelse(
        str_detect(Minute, '[+]'),
        as.integer(str_remove(Minute, '^.*[+]')),
        NA_integer_
      ),
      xg = as.numeric(xG),
      xgot = as.numeric(PSxG),
      g = as.integer(Outcome == 'Goal'),
      is_penalty = str_detect(Player, '\\(pen\\)'),
      is_free_kick = Notes == 'Free kick',
      is_open_play = !(Notes == 'Free kick') & !is_penalty,
      is_blocked = `Outcome` == 'Blocked',
      body_part = case_when(
        str_detect(`Body Part`, 'Foot') ~ str_replace(`Body Part`, ' Foot', 'Foot'),
        .default = `Body Part`
      ),
      # notes = Notes
      result = case_when(
        `Outcome` %in% c('Saved', 'Blocked') ~ 'AttemptSaved',
        `Outcome` == 'Goal' ~ 'Goal',
        .default = 'Miss'
      )
    )
}

transmute_fotmob_match_shots <- function(df) {
  df |> 
    transmute(
      team = ifelse(team_id == home_team_id, home_team, away_team),
      player = clean_player_name(player_name),
      half = ifelse(period == 'FirstHalf', 1L, 2L),
      minute = min,
      minute_added = min_added,
      xg = expected_goals,
      xgot = expected_goals_on_target,
      g = as.integer(event_type == 'Goal'),
      is_penalty = situation == 'Penalty',
      is_open_play = situation == 'RegularPlay',
      is_blocked,
      body_part = shot_type,
      result = event_type
    )
}

example_old_fbref_match_shots |> summarize_fbref_match_xg()
example_new_fbref_match_shots |> summarize_fbref_match_xg()
example_old_fotmob_match_shots |> summarize_fotmob_match_xg()
example_new_fotmob_match_shots |> summarize_fotmob_match_xg()
example_old_understat_match_shots |> summarize_understat_match_xg()
example_new_understat_match_shots |> summarize_understat_match_xg()

No difference with understat, so no need to pursue further.

example_old_fbref_match_shots |> transmute_fbref_match_shots()
example_new_fbref_match_shots |> transmute_fbref_match_shots()
example_old_fotmob_match_shots |> transmute_fotmob_match_shots()
example_new_fotmob_match_shots |> transmute_fotmob_match_shots()

examples <- list(
  example_old_fbref_match_shots |> 
    transmute_fbref_match_shots() |> 
    select(
      team,
      player,
      half,
      minute,
      minute_added,
      old_fbref_xg = xg, 
      old_fbref_xgot = xgot
    ),
  example_new_fbref_match_shots |> 
    transmute_fbref_match_shots() |> 
    select(
      team,
      player,
      half,
      minute,
      minute_added,
      new_fbref_xg = xg, 
      new_fbref_xgot = xgot
    ),
  example_old_fotmob_match_shots |> 
    transmute_fotmob_match_shots() |> 
    select(
      team,
      player,
      half,
      minute,
      minute_added,
      old_fotmob_xg = xg, 
      old_fotmob_xgot = xgot
    ),
  example_new_fotmob_match_shots |> 
    transmute_fotmob_match_shots() |> 
    select(
      team,
      player,
      half,
      minute,
      minute_added,
      new_fotmob_xg = xg, 
      new_fotmob_xgot = xgot
    )
) |> 
  reduce(
    full_join,
    by = join_by(
      team,
      player,
      half,
      minute,
      minute_added
    )
  ) |> 
  arrange(half, minute, minute_added, team, player) |> 
  mutate(
    fbref_d = round(old_fbref_xg - new_fbref_xg, 2),
    fotmob_d = round(old_fotmob_xg - new_fotmob_xg, 2),
    old_d = round(old_fbref_xg - old_fotmob_xg, 2),
    new_d = round(new_fbref_xg - new_fotmob_xg, 2)
  )
examples
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment