Created
April 9, 2024 05:14
-
-
Save forrestthewoods/9b870a19bb5e2e84f95c96fdd41dfa1b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pprint | |
import requests | |
from matplotlib.offsetbox import AnnotationBbox, OffsetImage | |
# Useful URLS | |
# https://www.fotmob.com/api/leagues?id=130&season=2023 | |
# https://www.fotmob.com/api/matchDetails?matchId=4386821 | |
# Constants | |
ENABLE_CACHE = True | |
DEBUG_SPEW = False | |
LEAGUE_ID_MLS = 130 | |
SEASON_ID = 2024 | |
# Runtime pseudo-constants | |
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
def try_load_from_cache(filepath): | |
if ENABLE_CACHE: | |
if os.path.exists(filepath): | |
with open(filepath, "r") as file: | |
result = json.load(file) | |
if DEBUG_SPEW: | |
print(f"Loaded from cache: {filepath}") | |
return result | |
return None | |
def write_to_cache(json_data, filepath): | |
os.makedirs(os.path.dirname(filepath), exist_ok=True) # python is regarded | |
with open(filepath, "w") as file: | |
json.dump(json_data, file, indent=4) | |
if DEBUG_SPEW: | |
print(f"Wrote to cache: {filepath}") | |
def fetch_url_json(url: str): | |
if DEBUG_SPEW: | |
print(f"Fetching: {url}") | |
response = requests.get(url) | |
if response.status_code != 200: | |
raise Exception("Failed to fetch: [{url}]\nResponse: [{str(response)}]") | |
return response.json() | |
def fetch_match_details(match_id): | |
url = f"https://www.fotmob.com/api/matchDetails?matchId={match_id}" | |
return fetch_url_json(url) | |
def get_season_info(league_id, season): | |
# compute season filepath | |
filename = f"season_{league_id}_{season}.json" | |
filepath = os.path.join(SCRIPT_DIR, "cache", filename) | |
# check cache | |
season_data = try_load_from_cache(filepath) | |
if season_data: | |
return season_data | |
# try to fetch | |
url = f"https://www.fotmob.com/api/leagues?id={league_id}&season={season}" | |
season_data = fetch_url_json(url) | |
# write to cache | |
write_to_cache(season_data, filepath) | |
# return result | |
return season_data | |
def get_matches(season): | |
result = [] | |
# iterate all matches | |
matches = season["matches"]["allMatches"] | |
for match in matches: | |
match_id = match["id"] | |
# ignore games that haven't finished | |
if match["status"]["finished"] == False: | |
continue | |
# compute match filepath | |
filename = f"match_{match_id}.json" | |
filepath = os.path.join(SCRIPT_DIR, "cache", filename) | |
# chech cache | |
match_data = try_load_from_cache(filepath) | |
if match_data: | |
result.append(match_data) | |
continue | |
# try to fetch | |
url = f"https://www.fotmob.com/api/matchDetails?matchId={match_id}" | |
match_data = fetch_url_json(url) | |
# write to cache | |
write_to_cache(match_data, filepath) | |
# append result | |
result.append(match_data) | |
return result | |
def accumulate_xg(matches): | |
result = {} | |
# iterate matches | |
for match in matches: | |
# ignore games that didn't finish | |
if match["general"]["finished"] == False: | |
continue | |
# determine teams | |
home_team_name = match["general"]["homeTeam"]["name"] | |
away_team_name = match["general"]["awayTeam"]["name"] | |
# find xG and xGoT data | |
xg_data = match["content"]["stats"]["Periods"]["All"]["stats"][2] | |
xg = None | |
xgot = None | |
for entry in xg_data["stats"]: | |
if entry["key"] == "expected_goals" and entry["stats"][0] != None: | |
xg = entry["stats"] | |
elif entry["key"] == "expected_goals_on_target" and entry["stats"][0] != None: | |
xgot = entry["stats"] | |
# ignore this game if we couldn't find xg and xgot | |
if xg == None or xgot == None: | |
continue | |
# ensure keys because Python is regarded | |
if not home_team_name in result: | |
result[home_team_name] = { "goals_for": 0, "xg": 0.0, "xgot": 0.0, "goals_against": 0, "xg_against": 0.0, "xgot_against": 0.0 } | |
if not away_team_name in result: | |
result[away_team_name] = { "goals_for": 0, "xg": 0.0, "xgot": 0.0, "goals_against": 0, "xg_against": 0.0, "xgot_against": 0.0 } | |
# get data | |
home_goals = match["header"]["teams"][0]["score"] | |
home_xg = float(xg[0]) | |
home_xgot = float(xgot[0]) | |
assert(match["header"]["teams"][0]["name"] == home_team_name) | |
away_goals = match["header"]["teams"][1]["score"] | |
away_xg = float(xg[1]) | |
away_xgot = float(xgot[1]) | |
assert(match["header"]["teams"][1]["name"] == away_team_name) | |
# accumulate home | |
result[home_team_name]["goals_for"] = result[home_team_name]["goals_for"] + home_goals | |
result[home_team_name]["xg"] = result[home_team_name]["xg"] + home_xg | |
result[home_team_name]["xgot"] = result[home_team_name]["xgot"] + home_xgot | |
result[home_team_name]["goals_against"] = result[home_team_name]["goals_against"] + away_goals | |
result[home_team_name]["xg_against"] = result[home_team_name]["xg"] + away_xg | |
result[home_team_name]["xgot_against"] = result[home_team_name]["xgot"] + away_xgot | |
# accumulate away | |
result[away_team_name]["goals_for"] = result[away_team_name]["goals_for"] + away_goals | |
result[away_team_name]["xg"] = result[away_team_name]["xg"] + away_xg | |
result[away_team_name]["xgot"] = result[away_team_name]["xgot"] + away_xgot | |
result[away_team_name]["goals_against"] = result[away_team_name]["goals_against"] + home_goals | |
result[away_team_name]["xg_against"] = result[away_team_name]["xg"] + home_xg | |
result[away_team_name]["xgot_against"] = result[away_team_name]["xgot"] + home_xgot | |
return result | |
def plot_one(x, y, icon_paths, title, x_label, y_label, ul_label, lr_label, filename): | |
fig, ax = plt.subplots(figsize=(10, 5.625)) | |
# scatter plot with fancy icons | |
icon_width = 20 | |
for i in range(len(x)): | |
image = plt.imread(icon_paths[i]) | |
image_width = image.shape[1] | |
zoom = icon_width / image_width | |
offset_image = OffsetImage(image, zoom=zoom, interpolation="antialiased") | |
ab = AnnotationBbox(offset_image, (x[i], y[i]), xycoords='data', frameon=False) | |
ax.add_artist(ab) | |
# labels | |
ax.set_title(title) | |
ax.set_xlabel(x_label) | |
ax.set_ylabel(y_label) | |
# plots start at 0! | |
ax.set_xlim(max(0, min(x) - 5), max(x) + 5) | |
ax.set_ylim(max(0, min(y) - 5), max(y) + 5) | |
# dotted line at x==y | |
x_min, x_max = ax.get_xlim() | |
y_min, y_max = ax.get_ylim() | |
lims = [np.min([x_min, y_min]), np.max([x_max, y_max])] | |
ax.plot(lims, lims, 'k--', alpha=0.75, zorder=0) | |
# label upper-left region | |
x_upper_left = x_min + 0.05 * (x_max - x_min) | |
y_upper_left = y_max - 0.1 * (y_max - y_min) | |
ax.text(x_upper_left, y_upper_left, ul_label, fontsize=12, ha='left', va='top') | |
# label lower-right region | |
x_lower_right = x_max - 0.05 * (x_max - x_min) | |
y_lower_right = y_min + 0.1 * (y_max - y_min) | |
ax.text(x_lower_right, y_lower_right, lr_label, fontsize=12, ha='right', va='bottom') | |
filepath = os.path.join(SCRIPT_DIR, "output", filename) | |
os.makedirs(os.path.dirname(filepath), exist_ok=True) # python is regarded | |
plt.savefig(filepath, dpi=300, bbox_inches='tight') | |
if __name__ == "__main__": | |
# get season info | |
print("Loading season...") | |
season = get_season_info(LEAGUE_ID_MLS, SEASON_ID) | |
# get matches info | |
print("Loading matches...") | |
matches = get_matches(season) | |
# get xg data | |
print("Accumulating data...") | |
xg = accumulate_xg(matches) | |
if DEBUG_SPEW: | |
pprint.pprint(xg) | |
# shared plot data | |
icon_paths = [f"{SCRIPT_DIR}/data/icons/{team}.png" for team, entry in xg.items()] | |
# xG vs xGoT | |
x = [entry['xg'] for team, entry in xg.items()] | |
y = [entry['xgot'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xG vs xGoT", "Expected Goals", "Expected Goals on Target", "Great Finishing", "Poor Finishing", f"01_mls_{SEASON_ID}_xg_vs_xgot.png") | |
# xG vs goals | |
x = [entry['xg'] for team, entry in xg.items()] | |
y = [entry['goals_for'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xG vs Actual Goals", "Expected Goals", "Actual Goals", "Overperform", "Underperform", f"02_mls_{SEASON_ID}_xg_vs_goals.png") | |
# xGoT vs goals | |
x = [entry['xgot'] for team, entry in xg.items()] | |
y = [entry['goals_for'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xGoT vs Actual Goals", "Expected Goals on Target", "Actual Goals", "Lucky", "Unlucky", f"03_mls_{SEASON_ID}_xgot_vs_goals.png") | |
# xG_against vs xGoT_against | |
x = [entry['xg_against'] for team, entry in xg.items()] | |
y = [entry['xgot_against'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xG Against vs xGoT Against", "Expected Goals Against", "Expected Goals on Target Against", "Great Finishing (by opponent)", "Poor Finishing (by opponent)", f"04_mls_{SEASON_ID}_xg_against_vs_xgot_against.png") | |
# xG_against vs goals_against | |
x = [entry['xg_against'] for team, entry in xg.items()] | |
y = [entry['goals_against'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xG Against vs Goals Against", "Expected Goals", "Actual Goals", "Opponent Overperform", "Opponent Underperform", f"05_mls_{SEASON_ID}_xg_against_vs_goals_against.png") | |
# xGoT_against vs goals_against | |
x = [entry['xgot_against'] for team, entry in xg.items()] | |
y = [entry['goals_against'] for team, entry in xg.items()] | |
plot_one(x, y, icon_paths, f"MLS {SEASON_ID} - xGoT Against vs Goals Against", "Expected Goals on Target", "Actual Goals", "Bad Keeper", "Good Keeper", f"06_mls_{SEASON_ID}_xgot_against_vs_goals_against.png") | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment