Skip to content

Instantly share code, notes, and snippets.

@MaxHalford
Last active March 1, 2019 15:36
Show Gist options
  • Save MaxHalford/142a724679226ca8504726c9be081a17 to your computer and use it in GitHub Desktop.
Save MaxHalford/142a724679226ca8504726c9be081a17 to your computer and use it in GitHub Desktop.
NCAA rating formulas
"""
Rating percentage index (RPI).
Example taken from https://www.wikiwand.com/en/Rating_percentage_index#/Basketball_formula
"""
X = pd.DataFrame(
data=[
(2010, 'UConn', 64, 'Kansas', 57),
(2010, 'UConn', 82, 'Duke', 68),
(2010, 'Wisconsin', 71, 'UConn', 72),
(2010, 'Kansas', 69, 'UConn', 62),
(2010, 'Duke', 81, 'Wisconsin', 70),
(2010, 'Wisconsin', 52, 'Kansas', 62)
],
columns=['Season', 'T1', 'T1_Score', 'T2', 'T2_Score']
)
X = pd.concat([
X[['Season', 'T1', 'T1_Score', 'T2', 'T2_Score']],
X[['Season', 'T2', 'T2_Score', 'T1', 'T1_Score']].rename(columns={
'T1': 'T2',
'T2': 'T1',
'T1_Score': 'T2_Score',
'T2_Score': 'T1_Score'
})
])
victory = lambda x: x['T1_Score'] > x['T2_Score']
win_rates = X.assign(victory=victory).groupby(['Season', 'T1'])['victory'].agg(['mean', 'count']).to_dict()
matchups = X.assign(victory=victory).groupby(['Season', 'T1', 'T2'])['victory'].agg(['sum', 'count']).to_dict()
n_matches = X.groupby(['Season', 'T1']).size().to_dict()
opponents = X.groupby(['Season', 'T1'])['T2'].unique().to_dict()
def update_mean(mean, count, removed_sum, removed_count):
return (mean * count - removed_sum) / (count - removed_count)
def calc_wp(season, team):
return win_rates['mean'][(season, team)]
def calc_owp(season, team):
return 1 / n_matches[(season, team)] * sum(
update_mean(
mean=win_rates['mean'][(season, opponent)],
count=win_rates['count'][(season, opponent)],
removed_sum=matchups['sum'][(season, opponent, team)],
removed_count=matchups['count'][(season, opponent, team)]
) * matchups['count'][(season, opponent, team)]
for opponent in opponents[(season, team)]
)
def calc_oowp(season, team, owps):
return 1 / n_matches[(season, team)] * sum(
owps[opponent] * matchups['count'][(season, opponent, team)]
for opponent in opponents[(season, team)]
)
def calc_rpi(wp, owp, oowp):
return wp * .25 + owp * .5 + oowp * .25
seasons = regular['Season'].unique()
wps = {
season: {
team: calc_wp(season, team)
for team in regular.query(f'Season == {season}')['T1'].unique()
}
for season in seasons
}
owps = {
season: {
team: calc_owp(season, team)
for team in regular.query(f'Season == {season}')['T1'].unique()
}
for season in seasons
}
oowps = {
season: {
team: calc_oowp(season, team, owps[season])
for team in regular.query(f'Season == {season}')['T1'].unique()
}
for season in seasons
}
rpis = {
season: {
team: calc_rpi(wps[season][team], owps[season][team], oowps[season][team])
for team in regular.query(f'Season == {season}')['T1'].unique()
}
for season in seasons
}
rpis = pd.DataFrame.from_dict(rpis, orient='columns').stack()
"""
Simple rating system (SRS).
"""
from scipy import optimize
X = pd.DataFrame(
data=[
(2010, 'UConn', 64, 'Kansas', 57),
(2010, 'UConn', 82, 'Duke', 68),
(2010, 'Wisconsin', 71, 'UConn', 72),
(2010, 'Kansas', 69, 'UConn', 62),
(2010, 'Duke', 81, 'Wisconsin', 70),
(2010, 'Wisconsin', 52, 'Kansas', 62)
],
columns=['Season', 'T1', 'T1_Score', 'T2', 'T2_Score']
)
srss = {}
for season in X['Season'].unique():
season_results = regular_results.query(f'Season == {season}')
teams = season_results['T1'].unique()
G = pd.concat(
[
(
(season_results['T1'] == team).astype(int) - \
(season_results['T2'] == team).astype(int)
).rename(team)
for team in teams
],
axis='columns'
)
S = season_results['T1_Score'] - season_results['T2_Score']
R = optimize.lsq_linear(G, S).x
srss[season] = pd.Series(R, index=teams)
srss = pd.DataFrame.from_dict(srss).stack()
df = df.join(srss.rename('T1_srs'), on=['T1', 'Season'])
df = df.join(srss.rename('T2_srs'), on=['T2', 'Season'])
@MaxHalford
Copy link
Author

For the 2019 NCAA Kaggle women competition I found that the RPI I calculated this way had a 0.91 correlation with the team seeds. I didn't take into account home and away weighting.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment