library(worldfootballR)
library(dplyr)
library(stringr)
library(readr)
library(purrr)
library(janitor)
# https://fbref.com/en/matches/a6ff9cf9/Chelsea-Norwich-City-October-23-2021-Premier-League
# https://www.fotmob.com/match/3610012/matchfacts/chelsea-vs-norwich-city
# https://understat.com/match/16459
old_fb_shots <- load_fb_match_shooting(
country = 'ENG',
tier = '1st',
gender = 'M',
season_end_year = 2022
)
old_fotmob_shots <- load_fotmob_match_details(
league_id = 47
)
old_understat_shots <- load_understat_league_shots('EPL')
example_fotmob_match_id <- 3610012
example_fbref_match_url <- 'https://fbref.com/en/matches/a6ff9cf9/Chelsea-Norwich-City-October-23-2021-Premier-League'
example_understat_match_id <- 16459
example_old_fotmob_match_shots <- old_fotmob_shots |>
filter(match_id == !!example_fotmob_match_id)
example_old_fbref_match_shots <- old_fb_shots |>
filter(MatchURL == !!example_fbref_match_url)
example_old_understat_match_shots <- old_understat_shots |>
filter(match_id == !!example_understat_match_id) |>
as_tibble()
example_new_fotmob_match_shots <- fotmob_get_match_details(example_fotmob_match_id)
example_new_fbref_match_shots <- fb_match_shooting(example_fbref_match_url)
example_new_understat_match_shots <- understat_match_shots(sprintf('https://understat.com/match/%s', example_understat_match_id)) |>
as_tibble() |>
janitor::clean_names() |>
rename(h_a = home_away)
summarize_fbref_match_xg <- function(df) {
df |>
group_by(team = Squad) |>
summarize(
xg = sum(as.numeric(xG), na.rm = TRUE),
xgot = sum(as.numeric(PSxG), na.rm = TRUE),
g = sum(Outcome == 'Goal')
)
}
summarize_fotmob_match_xg <- function(df) {
df |>
group_by(team = ifelse(team_id == home_team_id, home_team, away_team)) |>
summarize(
xg = sum(expected_goals, na.rm = TRUE),
xgot = sum(expected_goals_on_target, na.rm = TRUE),
g = sum(event_type == 'Goal'),
og = sum(is_own_goal)
)
}
summarize_understat_match_xg <- function(df) {
df |>
group_by(team = ifelse(h_a == 'h', home_team, away_team)) |>
summarize(
xg = sum(x_g, na.rm = TRUE),
g = sum(result == 'Goal'),
og = sum(result == 'OwnGoal')
)
}
clean_player_name <- function(x) {
iconv(x, to = 'ASCII//TRANSLIT')
}
transmute_fbref_match_shots <- function(df) {
df |>
as_tibble() |>
transmute(
team = Squad,
player = clean_player_name(str_remove(Player, ' \\(.*$')),
half = Match_Half,
minute = as.integer(str_remove(Minute, '[+].*$')),
minute_added = ifelse(
str_detect(Minute, '[+]'),
as.integer(str_remove(Minute, '^.*[+]')),
NA_integer_
),
xg = as.numeric(xG),
xgot = as.numeric(PSxG),
g = as.integer(Outcome == 'Goal'),
is_penalty = str_detect(Player, '\\(pen\\)'),
is_free_kick = Notes == 'Free kick',
is_open_play = !(Notes == 'Free kick') & !is_penalty,
is_blocked = `Outcome` == 'Blocked',
body_part = case_when(
str_detect(`Body Part`, 'Foot') ~ str_replace(`Body Part`, ' Foot', 'Foot'),
.default = `Body Part`
),
# notes = Notes
result = case_when(
`Outcome` %in% c('Saved', 'Blocked') ~ 'AttemptSaved',
`Outcome` == 'Goal' ~ 'Goal',
.default = 'Miss'
)
)
}
transmute_fotmob_match_shots <- function(df) {
df |>
transmute(
team = ifelse(team_id == home_team_id, home_team, away_team),
player = clean_player_name(player_name),
half = ifelse(period == 'FirstHalf', 1L, 2L),
minute = min,
minute_added = min_added,
xg = expected_goals,
xgot = expected_goals_on_target,
g = as.integer(event_type == 'Goal'),
is_penalty = situation == 'Penalty',
is_open_play = situation == 'RegularPlay',
is_blocked,
body_part = shot_type,
result = event_type
)
}
example_old_fbref_match_shots |> summarize_fbref_match_xg()
example_new_fbref_match_shots |> summarize_fbref_match_xg()
example_old_fotmob_match_shots |> summarize_fotmob_match_xg()
example_new_fotmob_match_shots |> summarize_fotmob_match_xg()
example_old_understat_match_shots |> summarize_understat_match_xg()
example_new_understat_match_shots |> summarize_understat_match_xg()
No difference with understat, so no need to pursue further.
example_old_fbref_match_shots |> transmute_fbref_match_shots()
example_new_fbref_match_shots |> transmute_fbref_match_shots()
example_old_fotmob_match_shots |> transmute_fotmob_match_shots()
example_new_fotmob_match_shots |> transmute_fotmob_match_shots()
examples <- list(
example_old_fbref_match_shots |>
transmute_fbref_match_shots() |>
select(
team,
player,
half,
minute,
minute_added,
old_fbref_xg = xg,
old_fbref_xgot = xgot
),
example_new_fbref_match_shots |>
transmute_fbref_match_shots() |>
select(
team,
player,
half,
minute,
minute_added,
new_fbref_xg = xg,
new_fbref_xgot = xgot
),
example_old_fotmob_match_shots |>
transmute_fotmob_match_shots() |>
select(
team,
player,
half,
minute,
minute_added,
old_fotmob_xg = xg,
old_fotmob_xgot = xgot
),
example_new_fotmob_match_shots |>
transmute_fotmob_match_shots() |>
select(
team,
player,
half,
minute,
minute_added,
new_fotmob_xg = xg,
new_fotmob_xgot = xgot
)
) |>
reduce(
full_join,
by = join_by(
team,
player,
half,
minute,
minute_added
)
) |>
arrange(half, minute, minute_added, team, player) |>
mutate(
fbref_d = round(old_fbref_xg - new_fbref_xg, 2),
fotmob_d = round(old_fotmob_xg - new_fotmob_xg, 2),
old_d = round(old_fbref_xg - old_fotmob_xg, 2),
new_d = round(new_fbref_xg - new_fotmob_xg, 2)
)
examples