Skip to content

Instantly share code, notes, and snippets.

@xgFootball
Created October 17, 2018 16:10
Show Gist options
  • Save xgFootball/ffade0115709593b111d000c08a73629 to your computer and use it in GitHub Desktop.
Save xgFootball/ffade0115709593b111d000c08a73629 to your computer and use it in GitHub Desktop.
import requests
import json
import pandas as pd
import time
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def plainRequest(url):
r = requests.get(url, verify=False)
return r.json()
def authenticatedRequest(url, token):
headers = {'Authorization': 'Bearer ' + token['accessToken']}
r = requests.get(url,
headers = headers,
verify = False)
return r.json()
def getToken():
email = '[email protected]'
pwd = 'pwd'
payload = {'email': email, 'password': pwd}
r = requests.post("https://api.xg.football/user/login",
verify=False,
json=payload)
return dict(r.json())
seasons = [89,134] ##two latest EPL seasons
matches = [] ##this will hold all the matches
for season in seasons: ##loop through seasons
url = "https://api.xg.football/season/matches/" + str(season) ##add season number to url
data = plainRequest(url) ##get data for that season
matches.extend(data) ##add that data to variable outside of loop
filteredMatchIds = [] ##this will hold the matchIds
for match in matches: ##loop through the matches
if int(match['startTime']) < time.time(): ##remove matches that haven't happened yet
filteredMatchIds.append(match['matchId']) ##extract the matchId
teamStats = []
for matchId in filteredMatchIds: ##loop through the list of matchIds
matchurl = 'https://api.xg.football/match/teamstats/' + str(matchId) ##add match number to url
data = plainRequest(matchurl) ##get data for that match
time.sleep(5)
teamStats.extend(data)
print("Got data for " + str(matchId))
df = pd.DataFrame(teamStats)
df.to_csv("teamStats.csv")
import requests
import json
import pandas as pd
import time
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
##Read into a Pandas dataframe from csv
df = pd.read_csv("teamStats.csv")
##This is for later, we are predicting the result so we need to what the
##actual result was for each match
extract = ['team','startTime','matchId','goalsFor','goalsAgainst']
goals = df[extract]
##We first set the index to the time the match started so that
##we know when the match we are predicting for occured
df.set_index('startTime', inplace=True)
##We then create a rolling average of each stat we are pulling
##We need to shift all the dates by one because we can't use data
##from Match X to predict Match X
features = ['shotsTotal','totalPasses']
featuresData = []
for feature in features:
featuresData.append(df.groupby("team")[feature].rolling(5).mean().shift(1))
featuresDf = pd.concat(featuresData, axis=1)
##Some matches don't have 5 before them, we need to remove those
featuresDf.dropna(inplace=True)
##We then merge in the actual results
featuresDfWithScore = featuresDf.merge(goals, on=['team','startTime'])
featuresDfWithScore.to_csv("prediction.csv")
import requests
import json
import pandas as pd
import time
import statsmodels.api as sm
import numpy as np
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def isWin(xScore, yScore):
return xScore > yScore
def isDraw(xScore, yScore):
return xScore == yScore
def getProbabilities(xPredicted, yPredicted):
trialNum = 10000
xPoisson = np.random.poisson(k['predictedGoals_x'], trialNum)
yPoisson = np.random.poisson(k['predictedGoals_y'], trialNum)
xWinCount = 0
drawCount = 0
for xScore, yScore in zip(xPoisson, yPoisson):
if isWin(xScore, yScore): xWinCount+=1
if isDraw(xScore, yScore): drawCount+=1
return xWinCount/trialNum, drawCount/trialNum, (trialNum-xWinCount-drawCount)/trialNum
##Read into a Pandas dataframe
df = pd.read_csv("prediction.csv", index_col=0)
##Remove the feature that we are predicting
end = np.asarray(df.pop("goalsFor"))
##Pick out the features used for prediction and add a constant
features = ['totalPasses','shotsTotal']
exog = np.asarray(sm.add_constant(df[features]))
##Build the model
poissonModel = sm.GLM(end, exog, family=sm.families.Poisson())
poissonRes = poissonModel.fit()
##Get predicted values and add to our results
predictedGoals = poissonRes.predict()
df['predictedGoals'] = predictedGoals
##Right now we have just a list of teams, not a list of matches that
##we can make individual predictions for
##There are many ways that you can do this but what we are doing here
##is merging the table with itself and then removing the rows that
##with two of the same teams, or match that we already have
selfMerge = df.merge(df, on="matchId")
dupChecker = []
rows = []
for i, k in selfMerge.iterrows():
if k['team_x'] != k['team_y'] and k['matchId'] not in dupChecker:
dupChecker.append(k['matchId'])
rows.append(dict(k))
##We then build out a prediction from our predicted goals by modelling
##as a Poisson variable, simultaing the outcome 10k times, and
##then working out the probabilities from those simulations
matchesWithPredictedGoals = pd.DataFrame(rows)
test = []
for i, k in matchesWithPredictedGoals.iterrows():
t1Win, draw, t2Win = getProbabilities(k['predictedGoals_x'], k['predictedGoals_y'])
temp = dict(k)
temp['t1Win'] = t1Win
temp['draw'] = draw
temp['t2Win'] = t2Win
test.append(temp)
final = pd.DataFrame(test)
final.to_csv("probability.csv")
import requests
import json
import pandas as pd
def plainRequest(url):
r = requests.get(url, verify=False)
return r.json()
def authenticatedRequest(url, token):
headers = {'Authorization': 'Bearer ' + token['accessToken']}
r = requests.get(url,
headers = headers,
verify = False)
return r.json()
def getToken():
email = '[email protected]'
pwd = 'pwd'
payload = {'email': email, 'password': pwd}
https://www.codepile.net/pile/qLwlKPba r = requests.post("https://api.xg.football/user/login",
verify=False,
json=payload)
return dict(r.json())
url = "https://api.xg.football/player/averages/21330"
data = plainRequest(url)
df = pd.DataFrame(data)
df.to_csv("data.csv")
import requests
import json
def plainRequest(url):
r = requests.get(url, verify=False)
return r.json()
def authenticatedRequest(url, token):
headers = {'Authorization': 'Bearer ' + token['accessToken']}
r = requests.get(url,
headers = headers,
verify = False)
return r.json()
def getToken():
email = '[email protected]'
pwd = 'pwd'
payload = {'email': email, 'password': pwd}
r = requests.post("https://api.xg.football/user/login",
verify=False,
json=payload)
return dict(r.json())
url = "https://api.xg.football/player/averages/21330"
print(plainRequest(url))
token = getToken()
print(authenticatedRequest(url, token))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment