Created
April 16, 2024 22:40
-
-
Save forrestthewoods/00e37a186c93b56de6c4069f4b4118b1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import requests | |
from matplotlib.offsetbox import AnnotationBbox, OffsetImage | |
from concurrent.futures import ThreadPoolExecutor | |
# Example URLS | |
# https://www.fotmob.com/api/leagues?id=130&season=2023 | |
# https://www.fotmob.com/api/matchDetails?matchId=4386821 | |
# Constants | |
ENABLE_CACHE = True | |
DEBUG_SPEW = False | |
NUM_PARALLEL_REQUESTS = 25 # matches are fetched in parallel | |
# api format: MLS | |
LEAGUE_ID_MLS = 130 | |
LEAGUE_NAME_MLS = "MLS" | |
SEASON_ID_MLS = "2024" | |
# api format: EPL | |
LEAGUE_ID_EPL = 47 | |
LEAGUE_NAME_EPL = "EPL" | |
SEASON_ID_EPL = "2023/2024" | |
# Config | |
CONFIG_LEAGUE_ID = LEAGUE_ID_MLS | |
CONFIG_LEAGUE_NAME = LEAGUE_NAME_MLS | |
if True: | |
CONFIG_SEASONS = ["2021"] | |
CONFIG_SEASONS_FRIENDLY_NAME = "2021" | |
elif False: | |
CONFIG_SEASONS = ["2021", "2022", "2023", "2024"] | |
CONFIG_SEASONS_FRIENDLY_NAME = "2021 - 2024" | |
# Runtime pseudo-constants | |
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
def try_load_from_cache(filepath): | |
if ENABLE_CACHE: | |
if os.path.exists(filepath): | |
with open(filepath, "r") as file: | |
result = json.load(file) | |
if DEBUG_SPEW: | |
print(f"Loaded from cache: {filepath}") | |
return result | |
return None | |
def write_to_cache(json_data, filepath): | |
os.makedirs(os.path.dirname(filepath), exist_ok=True) # python is regarded | |
with open(filepath, "w") as file: | |
json.dump(json_data, file, indent=4) | |
if DEBUG_SPEW: | |
print(f"Wrote to cache: {filepath}") | |
def fetch_url_json(url: str): | |
if DEBUG_SPEW: | |
print(f"Fetching: {url}") | |
response = requests.get(url) | |
if response.status_code != 200: | |
raise Exception("Failed to fetch: [{url}]\nResponse: [{str(response)}]") | |
return response.json() | |
def fetch_match_details(match_id): | |
url = f"https://www.fotmob.com/api/matchDetails?matchId={match_id}" | |
return fetch_url_json(url) | |
def get_seasons_info(league_id, seasons): | |
result = [] | |
for season in seasons: | |
print(f" Loading season: {season}") | |
# compute season filepath | |
filename = f"season_{league_id}_{season}.json" | |
filepath = os.path.join(SCRIPT_DIR, "cache", filename) | |
# check cache | |
season_data = try_load_from_cache(filepath) | |
if season_data: | |
result.append(season_data) | |
continue | |
# try to fetch | |
url = f"https://www.fotmob.com/api/leagues?id={league_id}&season={season}" | |
season_data = fetch_url_json(url) | |
result.append(season_data) | |
# write to cache | |
write_to_cache(season_data, filepath) | |
# return result | |
return result | |
def get_matches(seasons): | |
result = [] | |
requests = [] | |
# iterate all seasons | |
for season in seasons: | |
# iterate all matches | |
matches = season["matches"]["allMatches"] | |
for match in matches: | |
match_id = match["id"] | |
# ignore games that haven't finished | |
if match["status"]["finished"] == False: | |
continue | |
# compute match filepath | |
filename = f"match_{match_id}.json" | |
filepath = os.path.join(SCRIPT_DIR, "cache", filename) | |
# chech cache | |
match_data = try_load_from_cache(filepath) | |
if match_data: | |
result.append(match_data) | |
continue | |
# try to fetch | |
url = f"https://www.fotmob.com/api/matchDetails?matchId={match_id}" | |
requests.append((url, filepath)) | |
with ThreadPoolExecutor(max_workers=NUM_PARALLEL_REQUESTS) as executor: | |
print(f" Async fetching {len(requests)} matches") | |
# wrapper to return the filepath with the url | |
def fetch_url_wrapper(url, filepath): | |
return (fetch_url_json(url), filepath) | |
# Submit tasks to the executor | |
futures = [executor.submit(fetch_url_wrapper, url, filepath) for (url, filepath) in requests] | |
# Wait for all tasks to complete and get their results | |
for future in futures: | |
(match_data, filepath) = future.result() | |
# write to cache | |
write_to_cache(match_data, filepath) | |
# append result | |
result.append(match_data) | |
return result | |
def accumulate_team_xg(matches): | |
result = {} | |
# iterate matches | |
for match in matches: | |
# ignore games that didn't finish | |
if match["general"]["finished"] == False: | |
continue | |
# some stats mysteriously missing | |
if match["content"]["stats"] == None: | |
continue | |
# determine teams | |
home_team_name = match["general"]["homeTeam"]["name"] | |
away_team_name = match["general"]["awayTeam"]["name"] | |
# ignore bad data | |
if home_team_name == "Tottenham" or away_team_name == "Tottenham": | |
continue | |
# find xG and xGoT data | |
xg_data = match["content"]["stats"]["Periods"]["All"]["stats"][2] | |
xg = None | |
xgot = None | |
for entry in xg_data["stats"]: | |
if entry["key"] == "expected_goals" and entry["stats"][0] != None: | |
xg = entry["stats"] | |
elif entry["key"] == "expected_goals_on_target" and entry["stats"][0] != None: | |
xgot = entry["stats"] | |
# ignore this game if we couldn't find xg and xgot | |
if xg == None or xgot == None: | |
continue | |
# ensure keys because Python is regarded | |
if not home_team_name in result: | |
result[home_team_name] = { "goals_for": 0, "xg": 0.0, "xgot": 0.0, "goals_against": 0, "xg_against": 0.0, "xgot_against": 0.0 } | |
if not away_team_name in result: | |
result[away_team_name] = { "goals_for": 0, "xg": 0.0, "xgot": 0.0, "goals_against": 0, "xg_against": 0.0, "xgot_against": 0.0 } | |
# get data | |
home_goals = match["header"]["teams"][0]["score"] | |
home_xg = float(xg[0]) | |
home_xgot = float(xgot[0]) | |
assert(match["header"]["teams"][0]["name"] == home_team_name) | |
away_goals = match["header"]["teams"][1]["score"] | |
away_xg = float(xg[1]) | |
away_xgot = float(xgot[1]) | |
assert(match["header"]["teams"][1]["name"] == away_team_name) | |
# accumulate home | |
result[home_team_name]["goals_for"] = result[home_team_name]["goals_for"] + home_goals | |
result[home_team_name]["xg"] = result[home_team_name]["xg"] + home_xg | |
result[home_team_name]["xgot"] = result[home_team_name]["xgot"] + home_xgot | |
result[home_team_name]["goals_against"] = result[home_team_name]["goals_against"] + away_goals | |
result[home_team_name]["xg_against"] = result[home_team_name]["xg_against"] + away_xg | |
result[home_team_name]["xgot_against"] = result[home_team_name]["xgot_against"] + away_xgot | |
# accumulate away | |
result[away_team_name]["goals_for"] = result[away_team_name]["goals_for"] + away_goals | |
result[away_team_name]["xg"] = result[away_team_name]["xg"] + away_xg | |
result[away_team_name]["xgot"] = result[away_team_name]["xgot"] + away_xgot | |
result[away_team_name]["goals_against"] = result[away_team_name]["goals_against"] + home_goals | |
result[away_team_name]["xg_against"] = result[away_team_name]["xg_against"] + home_xg | |
result[away_team_name]["xgot_against"] = result[away_team_name]["xgot_against"] + home_xgot | |
return result | |
def accumulate_player_xg(matches): | |
result = {} | |
# iterate matches | |
for match in matches: | |
# ignore games that didn't finish | |
if match["general"]["finished"] == False: | |
continue | |
# iterate shots in match | |
for shot in match["content"]["shotmap"]["shots"]: | |
player = shot["playerName"] | |
# ignore own goals | |
if shot["isOwnGoal"]: | |
continue | |
# ensure player | |
if not player in result: | |
result[player] = { "xg": 0.0, "xgot": 0.0, "goals": 0 } | |
result[player]["xg"] = result[player]["xg"] + shot["expectedGoals"] | |
if shot["expectedGoalsOnTarget"] != None: | |
result[player]["xgot"] = result[player]["xg"] + shot["expectedGoalsOnTarget"] | |
if shot["eventType"] == "Goal": | |
result[player]["goals"] = result[player]["goals"] + 1 | |
return result | |
def plot_one(x, y, icon_paths, title, x_label, y_label, ul_label, lr_label, filename): | |
fig, ax = plt.subplots(figsize=(10, 5.625)) | |
# scatter plot with fancy icons | |
if icon_paths is None: | |
ax.scatter(x, y) | |
else: | |
icon_width = 20 | |
for i in range(len(x)): | |
image = plt.imread(icon_paths[i]) | |
image_width = image.shape[1] | |
zoom = icon_width / image_width | |
offset_image = OffsetImage(image, zoom=zoom, interpolation="antialiased") | |
ab = AnnotationBbox(offset_image, (x[i], y[i]), xycoords='data', frameon=False) | |
ax.add_artist(ab) | |
# labels | |
ax.set_title(title) | |
ax.set_xlabel(x_label) | |
ax.set_ylabel(y_label) | |
# plots start at 0! | |
ax.set_xlim(max(0, min(x) - 5), max(x) + 5) | |
ax.set_ylim(max(0, min(y) - 5), max(y) + 5) | |
# dotted line at x==y | |
x_min, x_max = ax.get_xlim() | |
y_min, y_max = ax.get_ylim() | |
lims = [np.min([x_min, y_min]), np.max([x_max, y_max])] | |
ax.plot(lims, lims, 'k--', alpha=0.75, zorder=0) | |
# label upper-left region | |
x_upper_left = x_min + 0.05 * (x_max - x_min) | |
y_upper_left = y_max - 0.1 * (y_max - y_min) | |
ax.text(x_upper_left, y_upper_left, ul_label, fontsize=12, ha='left', va='top') | |
# label lower-right region | |
x_lower_right = x_max - 0.05 * (x_max - x_min) | |
y_lower_right = y_min + 0.1 * (y_max - y_min) | |
ax.text(x_lower_right, y_lower_right, lr_label, fontsize=12, ha='right', va='bottom') | |
filepath = os.path.join(SCRIPT_DIR, "output", filename) | |
os.makedirs(os.path.dirname(filepath), exist_ok=True) # python is regarded | |
plt.savefig(filepath, dpi=300, bbox_inches='tight') | |
def plot_team(xg): | |
# shared plot data | |
icon_paths = [f"{SCRIPT_DIR}/data/icons/{team}.png" for team, entry in xg.items()] | |
# xG vs xGoT | |
x = [entry['xg'] for team, entry in xg.items()] | |
y = [entry['xgot'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xG vs xGoT", "Expected Goals", "Expected Goals on Target", "Great Finishing", "Poor Finishing", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/01_xg_vs_xgot.png") | |
# xG vs goals | |
x = [entry['xg'] for team, entry in xg.items()] | |
y = [entry['goals_for'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xG vs Actual Goals", "Expected Goals", "Actual Goals", "Overperform", "Underperform", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/02_xg_vs_goals.png") | |
# xGoT vs goals | |
x = [entry['xgot'] for team, entry in xg.items()] | |
y = [entry['goals_for'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xGoT vs Actual Goals", "Expected Goals on Target", "Actual Goals", "Lucky", "Unlucky", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/03_xgot_vs_goals.png") | |
# xG_against vs xGoT_against | |
x = [entry['xg_against'] for team, entry in xg.items()] | |
y = [entry['xgot_against'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xG Against vs xGoT Against", "Expected Goals Against", "Expected Goals on Target Against", "Great Finishing (by opponent)", "Poor Finishing (by opponent)", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/04_xg_against_vs_xgot_against.png") | |
# xG_against vs goals_against | |
x = [entry['xg_against'] for team, entry in xg.items()] | |
y = [entry['goals_against'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xG Against vs Goals Against", "Expected Goals Against", "Actual Goals Against", "Opponent Overperform", "Opponent Underperform", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/05_xg_against_vs_goals_against.png") | |
# xGoT_against vs goals_against | |
x = [entry['xgot_against'] for team, entry in xg.items()] | |
y = [entry['goals_against'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"{CONFIG_LEAGUE_NAME} {CONFIG_SEASONS_FRIENDLY_NAME} - xGoT Against vs Goals Against", "Expected Goals on Target Against", "Actual Goals Against", "Bad Keeper", "Good Keeper", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/06_xgot_against_vs_goals_against.png") | |
def plot_player(player_xg): | |
x = [data["xg"] for name, data in player_xg.items()] | |
y = [data["xgot"] for name, data in player_xg.items()] | |
plot_one(x, y, None, "Player xG vs xGoT", "Expected Goals", "Expected Goals on Target", "Clinical", "Sloppy", f"{CONFIG_SEASONS_FRIENDLY_NAME}_{CONFIG_LEAGUE_NAME}/07_player_xg_vs_xgot.png") | |
if __name__ == "__main__": | |
# get season info | |
print("Loading season...") | |
seasons = get_seasons_info(CONFIG_LEAGUE_ID, CONFIG_SEASONS) | |
# get matches info | |
print("Loading matches...") | |
matches = get_matches(seasons) | |
# get xg data | |
print("Accumulating data...") | |
team_xg = accumulate_team_xg(matches) | |
# get player xg data | |
player_xg = accumulate_player_xg(matches) | |
# Plot all the things | |
print("Plotting team data...") | |
plot_team(team_xg) | |
print("Plotting player data...") | |
plot_player(player_xg) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment