Skip to content

Instantly share code, notes, and snippets.

@forrestthewoods
Created April 16, 2024 22:40
Show Gist options
  • Save forrestthewoods/00e37a186c93b56de6c4069f4b4118b1 to your computer and use it in GitHub Desktop.
Save forrestthewoods/00e37a186c93b56de6c4069f4b4118b1 to your computer and use it in GitHub Desktop.
import os
import json
import matplotlib.pyplot as plt
import numpy as np
import requests
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
from concurrent.futures import ThreadPoolExecutor
# Example URLS
# https://www.fotmob.com/api/leagues?id=130&season=2023
# https://www.fotmob.com/api/matchDetails?matchId=4386821
# Constants
ENABLE_CACHE = True
DEBUG_SPEW = False
NUM_PARALLEL_REQUESTS = 25 # matches are fetched in parallel
# api format: MLS
LEAGUE_ID_MLS = 130
LEAGUE_NAME_MLS = "MLS"
SEASON_ID_MLS = "2024"
# api format: EPL
LEAGUE_ID_EPL = 47
LEAGUE_NAME_EPL = "EPL"
SEASON_ID_EPL = "2023/2024"
# Config
CONFIG_LEAGUE_ID = LEAGUE_ID_MLS
CONFIG_LEAGUE_NAME = LEAGUE_NAME_MLS
if True:
CONFIG_SEASONS = ["2021"]
CONFIG_SEASONS_FRIENDLY_NAME = "2021"
elif False:
CONFIG_SEASONS = ["2021", "2022", "2023", "2024"]
CONFIG_SEASONS_FRIENDLY_NAME = "2021 - 2024"
# Runtime pseudo-constants
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
def try_load_from_cache(filepath):
if ENABLE_CACHE:
if os.path.exists(filepath):
with open(filepath, "r") as file:
result = json.load(file)
if DEBUG_SPEW:
print(f"Loaded from cache: {filepath}")
return result
return None
def write_to_cache(json_data, filepath):
os.makedirs(os.path.dirname(filepath), exist_ok=True) # python is regarded
with open(filepath, "w") as file:
json.dump(json_data, file, indent=4)
if DEBUG_SPEW:
print(f"Wrote to cache: {filepath}")
def fetch_url_json(url: str):
if DEBUG_SPEW:
print(f"Fetching: {url}")
response = requests.get(url)
if response.status_code != 200:
raise Exception("Failed to fetch: [{url}]\nResponse: [{str(response)}]")
return response.json()
def fetch_match_details(match_id):
url = f"https://www.fotmob.com/api/matchDetails?matchId={match_id}"
return fetch_url_json(url)
def get_seasons_info(league_id, seasons):
result = []
for season in seasons:
print(f" Loading season: {season}")
# compute season filepath
filename = f"season_{league_id}_{season}.json"
filepath = os.path.join(SCRIPT_DIR, "cache", filename)
# check cache
season_data = try_load_from_cache(filepath)
if season_data:
result.append(season_data)
continue
# try to fetch
url = f"https://www.fotmob.com/api/leagues?id={league_id}&season={season}"
season_data = fetch_url_json(url)
result.append(season_data)
# write to cache
write_to_cache(season_data, filepath)
# return result
return result
def get_matches(seasons):
result = []
requests = []
# iterate all seasons
for season in seasons:
# iterate all matches
matches = season["matches"]["allMatches"]
for match in matches:
match_id = match["id"]
# ignore games that haven't finished
if match["status"]["finished"] == False:
continue
# compute match filepath
filename = f"match_{match_id}.json"
filepath = os.path.join(SCRIPT_DIR, "cache", filename)
# chech cache
match_data = try_load_from_cache(filepath)
if match_data:
result.append(match_data)
continue
# try to fetch
url = f"https://www.fotmob.com/api/matchDetails?matchId={match_id}"
requests.append((url, filepath))
with ThreadPoolExecutor(max_workers=NUM_PARALLEL_REQUESTS) as executor:
print(f" Async fetching {len(requests)} matches")
# wrapper to return the filepath with the url
def fetch_url_wrapper(url, filepath):
return (fetch_url_json(url), filepath)
# Submit tasks to the executor
futures = [executor.submit(fetch_url_wrapper, url, filepath) for (url, filepath) in requests]
# Wait for all tasks to complete and get their results
for future in futures:
(match_data, filepath) = future.result()
# write to cache
write_to_cache(match_data, filepath)
# append result
result.append(match_data)
return result
def accumulate_team_xg(matches):
result = {}
# iterate matches
for match in matches:
# ignore games that didn't finish
if match["general"]["finished"] == False:
continue
# some stats mysteriously missing
if match["content"]["stats"] == None:
continue
# determine teams
home_team_name = match["general"]["homeTeam"]["name"]
away_team_name = match["general"]["awayTeam"]["name"]
# ignore bad data
if home_team_name == "Tottenham" or away_team_name == "Tottenham":
continue
# find xG and xGoT data
xg_data = match["content"]["stats"]["Periods"]["All"]["stats"][2]
xg = None
xgot = None
for entry in xg_data["stats"]:
if entry["key"] == "expected_goals" and entry["stats"][0] != None:
xg = entry["stats"]
elif entry["key"] == "expected_goals_on_target" and entry["stats"][0] != None:
xgot = entry["stats"]
# ignore this game if we couldn't find xg and xgot
if xg == None or xgot == None:
continue
# ensure keys because Python is regarded
if not home_team_name in result:
result[home_team_name] = { "goals_for": 0, "xg": 0.0, "xgot": 0.0, "goals_against": 0, "xg_against": 0.0, "xgot_against": 0.0 }
if not away_team_name in result:
result[away_team_name] = { "goals_for": 0, "xg": 0.0, "xgot": 0.0, "goals_against": 0, "xg_against": 0.0, "xgot_against": 0.0 }
# get data
home_goals = match["header"]["teams"][0]["score"]
home_xg = float(xg[0])
home_xgot = float(xgot[0])
assert(match["header"]["teams"][0]["name"] == home_team_name)
away_goals = match["header"]["teams"][1]["score"]
away_xg = float(xg[1])
away_xgot = float(xgot[1])
assert(match["header"]["teams"][1]["name"] == away_team_name)
# accumulate home
result[home_team_name]["goals_for"] = result[home_team_name]["goals_for"] + home_goals
result[home_team_name]["xg"] = result[home_team_name]["xg"] + home_xg
result[home_team_name]["xgot"] = result[home_team_name]["xgot"] + home_xgot
result[home_team_name]["goals_against"] = result[home_team_name]["goals_against"] + away_goals
result[home_team_name]["xg_against"] = result[home_team_name]["xg_against"] + away_xg
result[home_team_name]["xgot_against"] = result[home_team_name]["xgot_against"] + away_xgot
# accumulate away
result[away_team_name]["goals_for"] = result[away_team_name]["goals_for"] + away_goals
result[away_team_name]["xg"] = result[away_team_name]["xg"] + away_xg
result[away_team_name]["xgot"] = result[away_team_name]["xgot"] + away_xgot
result[away_team_name]["goals_against"] = result[away_team_name]["goals_against"] + home_goals
result[away_team_name]["xg_against"] = result[away_team_name]["xg_against"] + home_xg
result[away_team_name]["xgot_against"] = result[away_team_name]["xgot_against"] + home_xgot
return result
def accumulate_player_xg(matches):
result = {}
# iterate matches
for match in matches:
# ignore games that didn't finish
if match["general"]["finished"] == False:
continue
# iterate shots in match
for shot in match["content"]["shotmap"]["shots"]:
player = shot["playerName"]
# ignore own goals
if shot["isOwnGoal"]:
continue
# ensure player
if not player in result:
result[player] = { "xg": 0.0, "xgot": 0.0, "goals": 0 }
result[player]["xg"] = result[player]["xg"] + shot["expectedGoals"]
if shot["expectedGoalsOnTarget"] != None:
result[player]["xgot"] = result[player]["xg"] + shot["expectedGoalsOnTarget"]
if shot["eventType"] == "Goal":
result[player]["goals"] = result[player]["goals"] + 1
return result
def plot_one(x, y, icon_paths, title, x_label, y_label, ul_label, lr_label, filename):
fig, ax = plt.subplots(figsize=(10, 5.625))
# scatter plot with fancy icons
if icon_paths is None:
ax.scatter(x, y)
else:
icon_width = 20
for i in range(len(x)):
image = plt.imread(icon_paths[i])
image_width = image.shape[1]
zoom = icon_width / image_width
offset_image = OffsetImage(image, zoom=zoom, interpolation="antialiased")
ab = AnnotationBbox(offset_image, (x[i], y[i]), xycoords='data', frameon=False)
ax.add_artist(ab)
# labels
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
# plots start at 0!
ax.set_xlim(max(0, min(x) - 5), max(x) + 5)
ax.set_ylim(max(0, min(y) - 5), max(y) + 5)
# dotted line at x==y
x_min, x_max = ax.get_xlim()
y_min, y_max = ax.get_ylim()
lims = [np.min([x_min, y_min]), np.max([x_max, y_max])]
ax.plot(lims, lims, 'k--', alpha=0.75, zorder=0)
# label upper-left region
x_upper_left = x_min + 0.05 * (x_max - x_min)
y_upper_left = y_max - 0.1 * (y_max - y_min)
ax.text(x_upper_left, y_upper_left, ul_label, fontsize=12, ha='left', va='top')
# label lower-right region
x_lower_right = x_max - 0.05 * (x_max - x_min)
y_lower_right = y_min + 0.1 * (y_max - y_min)
ax.text(x_lower_right, y_lower_right, lr_label, fontsize=12, ha='right', va='bottom')
filepath = os.path.join(SCRIPT_DIR, "output", filename)
os.makedirs(os.path.dirname(filepath), exist_ok=True) # python is regarded
plt.savefig(filepath, dpi=300, bbox_inches='tight')
def plot_team(xg):
# shared plot data
icon_paths = [f"{SCRIPT_DIR}/data/icons/{team}.png" for team, entry in xg.items()]
# xG vs xGoT
x = [entry['xg'] for team, entry in xg.items()]
y = [entry['xgot'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xG vs xGoT", "Expected Goals", "Expected Goals on Target", "Great Finishing", "Poor Finishing", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/01_xg_vs_xgot.png")
# xG vs goals
x = [entry['xg'] for team, entry in xg.items()]
y = [entry['goals_for'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xG vs Actual Goals", "Expected Goals", "Actual Goals", "Overperform", "Underperform", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/02_xg_vs_goals.png")
# xGoT vs goals
x = [entry['xgot'] for team, entry in xg.items()]
y = [entry['goals_for'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xGoT vs Actual Goals", "Expected Goals on Target", "Actual Goals", "Lucky", "Unlucky", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/03_xgot_vs_goals.png")
# xG_against vs xGoT_against
x = [entry['xg_against'] for team, entry in xg.items()]
y = [entry['xgot_against'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xG Against vs xGoT Against", "Expected Goals Against", "Expected Goals on Target Against", "Great Finishing (by opponent)", "Poor Finishing (by opponent)", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/04_xg_against_vs_xgot_against.png")
# xG_against vs goals_against
x = [entry['xg_against'] for team, entry in xg.items()]
y = [entry['goals_against'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xG Against vs Goals Against", "Expected Goals Against", "Actual Goals Against", "Opponent Overperform", "Opponent Underperform", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/05_xg_against_vs_goals_against.png")
# xGoT_against vs goals_against
x = [entry['xgot_against'] for team, entry in xg.items()]
y = [entry['goals_against'] for team, entry in xg.items()]
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xGoT Against vs Goals Against", "Expected Goals on Target Against", "Actual Goals Against", "Bad Keeper", "Good Keeper", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/06_xgot_against_vs_goals_against.png")
def plot_player(player_xg):
x = [data["xg"] for name, data in player_xg.items()]
y = [data["xgot"] for name, data in player_xg.items()]
plot_one(x, y, None, "Player xG vs xGoT", "Expected Goals", "Expected Goals on Target", "Clinical", "Sloppy", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/07_player_xg_vs_xgot.png")
if __name__ == "__main__":
# get season info
print("Loading season...")
seasons = get_seasons_info(CONFIG_LEAGUE_ID, CONFIG_SEASONS)
# get matches info
print("Loading matches...")
matches = get_matches(seasons)
# get xg data
print("Accumulating data...")
team_xg = accumulate_team_xg(matches)
# get player xg data
player_xg = accumulate_player_xg(matches)
# Plot all the things
print("Plotting team data...")
plot_team(team_xg)
print("Plotting player data...")
plot_player(player_xg)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment