lukebyrne · January 18, 2023 03:01
diff --git a/true-skill.ipynb b/true-skill.ipynb
diff --git a/true-skill.py b/true-skill.py
 # Fetch the data
 df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')

 # Create a holding DataFrame for our TrueRank
 df_truerank_columns = ['game_id', 'player_id', 'position', 'mu', 'sigma', 'post_mu', 'post_sigma']
 df_truerank = pd.DataFrame(columns=df_truerank_columns)

 # Use a sample of 1000
 #df = df.head(1000)

 # Group by the game_id
 games = df.groupby('game_id')

 # Now iterate the games
 for game_id, game in games:
    # Setup lists so we can zip them back up at the end
    trueskills = []    
    player_ids = []
    game_ids = []  
    mus = []    
    sigmas = []
    post_mus = []
    post_sigmas = []
    
    # Now iterate over each player in a game
    for index, row in game.iterrows():
        
        # Create a game_ids arary for zipping up
        game_ids.append(game_id)
    
        # Now push the player_id onto the player_ids array for zipping up
        player_ids.append(int(row['player_id']))
        
        # Get the players last game, hence tail(1)
        filter = (df_truerank['game_id'] < game_id) & (df_truerank['player_id'] == row['player_id'])                            
        df_player = df_truerank[filter].tail(1)
        
        # If there isnt a game then just use the TrueSkill defaults
        if (len(df_player) == 0):
            mu=25
            sigma=8.333
        else:
            # Otherwise get the mu and sigma from the players last game
            row = df_player.iloc[0]
            mu = row['post_mu']
            sigma = row['post_sigma']
        
        # Keep lists of pre mu and sigmas
        mus.append(mu)
        sigmas.append(sigma)
        
        # Now create a TrueSkull Rating() class and pass it into the trueskills dictionary
        trueskills.append(Rating(mu=mu, sigma=sigma))
                                    
    # Create tuples out of the trueskills array
    trueskills_tuples = [(x,) for x in trueskills]
    
    # Use the positions as ranks, they are 0 based so -1 from all of them
    ranks = [x - 1 for x in list(game['position'])]
    
    # Get the results from the TrueSkill rate method
    results = rate(trueskills_tuples, ranks=ranks)
    
    # Loop the TrueSkill results and get the new mu and sigma for each player
    for result in results:
        post_mus.append(round(result[0].mu, 2))
        post_sigmas.append(round(result[0].sigma, 2))
    
    # Change the positions back to non 0 based
    positions = [x + 1 for x in ranks]
    
    # Now zip together all our lists 
    data = list(zip(game_ids, player_ids, positions, mus, sigmas, post_mus, post_sigmas))
    
    # Create a temp DataFrame the same as df_truerank and add data to the DataFrame
    df_temp = pd.DataFrame(data, columns=df_truerank_columns)
        
    # Add df_temp to our df_truerank
    df_truerank = df_truerank.append(df_temp)
	# Fetch the data
	df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')

	# Create a holding DataFrame for our TrueRank
	df_truerank_columns = ['game_id', 'player_id', 'position', 'mu', 'sigma', 'post_mu', 'post_sigma']
	df_truerank = pd.DataFrame(columns=df_truerank_columns)

	# Use a sample of 1000
	#df = df.head(1000)

	# Group by the game_id
	games = df.groupby('game_id')

	# Now iterate the games
	for game_id, game in games:
	# Setup lists so we can zip them back up at the end
	trueskills = []
	player_ids = []
	game_ids = []
	mus = []
	sigmas = []
	post_mus = []
	post_sigmas = []

	# Now iterate over each player in a game
	for index, row in game.iterrows():

	# Create a game_ids arary for zipping up
	game_ids.append(game_id)

	# Now push the player_id onto the player_ids array for zipping up
	player_ids.append(int(row['player_id']))

	# Get the players last game, hence tail(1)
	filter = (df_truerank['game_id'] < game_id) & (df_truerank['player_id'] == row['player_id'])
	df_player = df_truerank[filter].tail(1)

	# If there isnt a game then just use the TrueSkill defaults
	if (len(df_player) == 0):
	mu=25
	sigma=8.333
	else:
	# Otherwise get the mu and sigma from the players last game
	row = df_player.iloc[0]
	mu = row['post_mu']
	sigma = row['post_sigma']

	# Keep lists of pre mu and sigmas
	mus.append(mu)
	sigmas.append(sigma)

	# Now create a TrueSkull Rating() class and pass it into the trueskills dictionary
	trueskills.append(Rating(mu=mu, sigma=sigma))

	# Create tuples out of the trueskills array
	trueskills_tuples = [(x,) for x in trueskills]

	# Use the positions as ranks, they are 0 based so -1 from all of them
	ranks = [x - 1 for x in list(game['position'])]

	# Get the results from the TrueSkill rate method
	results = rate(trueskills_tuples, ranks=ranks)

	# Loop the TrueSkill results and get the new mu and sigma for each player
	for result in results:
	post_mus.append(round(result[0].mu, 2))
	post_sigmas.append(round(result[0].sigma, 2))

	# Change the positions back to non 0 based
	positions = [x + 1 for x in ranks]

	# Now zip together all our lists
	data = list(zip(game_ids, player_ids, positions, mus, sigmas, post_mus, post_sigmas))

	# Create a temp DataFrame the same as df_truerank and add data to the DataFrame
	df_temp = pd.DataFrame(data, columns=df_truerank_columns)

	# Add df_temp to our df_truerank
	df_truerank = df_truerank.append(df_temp)