Created
October 17, 2018 16:10
-
-
Save xgFootball/ffade0115709593b111d000c08a73629 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import pandas as pd | |
import time | |
import urllib3 | |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | |
def plainRequest(url): | |
r = requests.get(url, verify=False) | |
return r.json() | |
def authenticatedRequest(url, token): | |
headers = {'Authorization': 'Bearer ' + token['accessToken']} | |
r = requests.get(url, | |
headers = headers, | |
verify = False) | |
return r.json() | |
def getToken(): | |
email = '[email protected]' | |
pwd = 'pwd' | |
payload = {'email': email, 'password': pwd} | |
r = requests.post("https://api.xg.football/user/login", | |
verify=False, | |
json=payload) | |
return dict(r.json()) | |
seasons = [89,134] ##two latest EPL seasons | |
matches = [] ##this will hold all the matches | |
for season in seasons: ##loop through seasons | |
url = "https://api.xg.football/season/matches/" + str(season) ##add season number to url | |
data = plainRequest(url) ##get data for that season | |
matches.extend(data) ##add that data to variable outside of loop | |
filteredMatchIds = [] ##this will hold the matchIds | |
for match in matches: ##loop through the matches | |
if int(match['startTime']) < time.time(): ##remove matches that haven't happened yet | |
filteredMatchIds.append(match['matchId']) ##extract the matchId | |
teamStats = [] | |
for matchId in filteredMatchIds: ##loop through the list of matchIds | |
matchurl = 'https://api.xg.football/match/teamstats/' + str(matchId) ##add match number to url | |
data = plainRequest(matchurl) ##get data for that match | |
time.sleep(5) | |
teamStats.extend(data) | |
print("Got data for " + str(matchId)) | |
df = pd.DataFrame(teamStats) | |
df.to_csv("teamStats.csv") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import pandas as pd | |
import time | |
import urllib3 | |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | |
##Read into a Pandas dataframe from csv | |
df = pd.read_csv("teamStats.csv") | |
##This is for later, we are predicting the result so we need to what the | |
##actual result was for each match | |
extract = ['team','startTime','matchId','goalsFor','goalsAgainst'] | |
goals = df[extract] | |
##We first set the index to the time the match started so that | |
##we know when the match we are predicting for occured | |
df.set_index('startTime', inplace=True) | |
##We then create a rolling average of each stat we are pulling | |
##We need to shift all the dates by one because we can't use data | |
##from Match X to predict Match X | |
features = ['shotsTotal','totalPasses'] | |
featuresData = [] | |
for feature in features: | |
featuresData.append(df.groupby("team")[feature].rolling(5).mean().shift(1)) | |
featuresDf = pd.concat(featuresData, axis=1) | |
##Some matches don't have 5 before them, we need to remove those | |
featuresDf.dropna(inplace=True) | |
##We then merge in the actual results | |
featuresDfWithScore = featuresDf.merge(goals, on=['team','startTime']) | |
featuresDfWithScore.to_csv("prediction.csv") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import pandas as pd | |
import time | |
import statsmodels.api as sm | |
import numpy as np | |
import urllib3 | |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | |
def isWin(xScore, yScore): | |
return xScore > yScore | |
def isDraw(xScore, yScore): | |
return xScore == yScore | |
def getProbabilities(xPredicted, yPredicted): | |
trialNum = 10000 | |
xPoisson = np.random.poisson(k['predictedGoals_x'], trialNum) | |
yPoisson = np.random.poisson(k['predictedGoals_y'], trialNum) | |
xWinCount = 0 | |
drawCount = 0 | |
for xScore, yScore in zip(xPoisson, yPoisson): | |
if isWin(xScore, yScore): xWinCount+=1 | |
if isDraw(xScore, yScore): drawCount+=1 | |
return xWinCount/trialNum, drawCount/trialNum, (trialNum-xWinCount-drawCount)/trialNum | |
##Read into a Pandas dataframe | |
df = pd.read_csv("prediction.csv", index_col=0) | |
##Remove the feature that we are predicting | |
end = np.asarray(df.pop("goalsFor")) | |
##Pick out the features used for prediction and add a constant | |
features = ['totalPasses','shotsTotal'] | |
exog = np.asarray(sm.add_constant(df[features])) | |
##Build the model | |
poissonModel = sm.GLM(end, exog, family=sm.families.Poisson()) | |
poissonRes = poissonModel.fit() | |
##Get predicted values and add to our results | |
predictedGoals = poissonRes.predict() | |
df['predictedGoals'] = predictedGoals | |
##Right now we have just a list of teams, not a list of matches that | |
##we can make individual predictions for | |
##There are many ways that you can do this but what we are doing here | |
##is merging the table with itself and then removing the rows that | |
##with two of the same teams, or match that we already have | |
selfMerge = df.merge(df, on="matchId") | |
dupChecker = [] | |
rows = [] | |
for i, k in selfMerge.iterrows(): | |
if k['team_x'] != k['team_y'] and k['matchId'] not in dupChecker: | |
dupChecker.append(k['matchId']) | |
rows.append(dict(k)) | |
##We then build out a prediction from our predicted goals by modelling | |
##as a Poisson variable, simultaing the outcome 10k times, and | |
##then working out the probabilities from those simulations | |
matchesWithPredictedGoals = pd.DataFrame(rows) | |
test = [] | |
for i, k in matchesWithPredictedGoals.iterrows(): | |
t1Win, draw, t2Win = getProbabilities(k['predictedGoals_x'], k['predictedGoals_y']) | |
temp = dict(k) | |
temp['t1Win'] = t1Win | |
temp['draw'] = draw | |
temp['t2Win'] = t2Win | |
test.append(temp) | |
final = pd.DataFrame(test) | |
final.to_csv("probability.csv") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import pandas as pd | |
def plainRequest(url): | |
r = requests.get(url, verify=False) | |
return r.json() | |
def authenticatedRequest(url, token): | |
headers = {'Authorization': 'Bearer ' + token['accessToken']} | |
r = requests.get(url, | |
headers = headers, | |
verify = False) | |
return r.json() | |
def getToken(): | |
email = '[email protected]' | |
pwd = 'pwd' | |
payload = {'email': email, 'password': pwd} | |
https://www.codepile.net/pile/qLwlKPba r = requests.post("https://api.xg.football/user/login", | |
verify=False, | |
json=payload) | |
return dict(r.json()) | |
url = "https://api.xg.football/player/averages/21330" | |
data = plainRequest(url) | |
df = pd.DataFrame(data) | |
df.to_csv("data.csv") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
def plainRequest(url): | |
r = requests.get(url, verify=False) | |
return r.json() | |
def authenticatedRequest(url, token): | |
headers = {'Authorization': 'Bearer ' + token['accessToken']} | |
r = requests.get(url, | |
headers = headers, | |
verify = False) | |
return r.json() | |
def getToken(): | |
email = '[email protected]' | |
pwd = 'pwd' | |
payload = {'email': email, 'password': pwd} | |
r = requests.post("https://api.xg.football/user/login", | |
verify=False, | |
json=payload) | |
return dict(r.json()) | |
url = "https://api.xg.football/player/averages/21330" | |
print(plainRequest(url)) | |
token = getToken() | |
print(authenticatedRequest(url, token)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment