Last active
January 18, 2023 03:01
-
-
Save lukebyrne/97e77cf57374f49a75d2914532b2adde to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fetch the data | |
df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv') | |
# Create a holding DataFrame for our TrueRank | |
df_truerank_columns = ['game_id', 'player_id', 'position', 'mu', 'sigma', 'post_mu', 'post_sigma'] | |
df_truerank = pd.DataFrame(columns=df_truerank_columns) | |
# Use a sample of 1000 | |
#df = df.head(1000) | |
# Group by the game_id | |
games = df.groupby('game_id') | |
# Now iterate the games | |
for game_id, game in games: | |
# Setup lists so we can zip them back up at the end | |
trueskills = [] | |
player_ids = [] | |
game_ids = [] | |
mus = [] | |
sigmas = [] | |
post_mus = [] | |
post_sigmas = [] | |
# Now iterate over each player in a game | |
for index, row in game.iterrows(): | |
# Create a game_ids arary for zipping up | |
game_ids.append(game_id) | |
# Now push the player_id onto the player_ids array for zipping up | |
player_ids.append(int(row['player_id'])) | |
# Get the players last game, hence tail(1) | |
filter = (df_truerank['game_id'] < game_id) & (df_truerank['player_id'] == row['player_id']) | |
df_player = df_truerank[filter].tail(1) | |
# If there isnt a game then just use the TrueSkill defaults | |
if (len(df_player) == 0): | |
mu=25 | |
sigma=8.333 | |
else: | |
# Otherwise get the mu and sigma from the players last game | |
row = df_player.iloc[0] | |
mu = row['post_mu'] | |
sigma = row['post_sigma'] | |
# Keep lists of pre mu and sigmas | |
mus.append(mu) | |
sigmas.append(sigma) | |
# Now create a TrueSkull Rating() class and pass it into the trueskills dictionary | |
trueskills.append(Rating(mu=mu, sigma=sigma)) | |
# Create tuples out of the trueskills array | |
trueskills_tuples = [(x,) for x in trueskills] | |
# Use the positions as ranks, they are 0 based so -1 from all of them | |
ranks = [x - 1 for x in list(game['position'])] | |
# Get the results from the TrueSkill rate method | |
results = rate(trueskills_tuples, ranks=ranks) | |
# Loop the TrueSkill results and get the new mu and sigma for each player | |
for result in results: | |
post_mus.append(round(result[0].mu, 2)) | |
post_sigmas.append(round(result[0].sigma, 2)) | |
# Change the positions back to non 0 based | |
positions = [x + 1 for x in ranks] | |
# Now zip together all our lists | |
data = list(zip(game_ids, player_ids, positions, mus, sigmas, post_mus, post_sigmas)) | |
# Create a temp DataFrame the same as df_truerank and add data to the DataFrame | |
df_temp = pd.DataFrame(data, columns=df_truerank_columns) | |
# Add df_temp to our df_truerank | |
df_truerank = df_truerank.append(df_temp) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment