sjdv1982 · July 19, 2019 21:35
diff --git a/tournament.py b/tournament.py
 """
 Recursive Bayesian approach to create optimal tournament schedules

 Solution for The Riddler Classic, July 19: https://fivethirtyeight.com/features/can-you-construct-the-optimal-tournament/

 Results:

 4 players, 4 games:
 Game 1. Player A vs player B, winner becomes the favorite.
 Game 2. Favorite vs player C.
 Game 3. Favorite vs player D.
 If the favorite wins all three games, she is declared the champion. A re-match game 4 against player B/A is unnecessary, but may improve 
 the confidence (60.8 % if the favorite wins). 
 If the favorite loses game 2 or 3,  game 4 must be a re-match of that game, and determines the champion. 
 The favorite losing exactly one of the four games is the worst-case scenario (she will be declared champion with 42.9 % confidence).
 If the favorite loses both game 2 and game 3, then game 4 is player C vs player D, which determines the champion (44.4 % confidence)

 5 players, 5 games
 The same three games as before. The favorite remains favorite unless she loses both game 2 and 3, which will make player C the new 
 favorite. Game 4 is the favorite against player E.
 If the favorite wins all four games, she is declared the champion. A re-match game 5 against player B/A is unnecessary, but may improve 
 the confidence (57.7 % if the favorite wins). 
 If she wins game 2 but loses game 3 or 4, game 5 must be a re-match of that game, and determines the champion. If she loses game 3 
 and 4,  game 5 is between player D and E, which determines the champion.
 If she loses game 2 but wins game 3, game 5 must be a re-match of game 2, and determines the champion.
 If she loses game 2 and 3, player C becomes favorite. The winner of game 4 will play for the championship against player D. 
 The worst scenario is if player E wins game 4 but player D wins game 5, becoming champion at 35.6 % confidence.

 """

 N = 5
 G = 5

 import sys
 import numpy as np
 from itertools import permutations
 from math import factorial

 n_orderings = factorial(N)
 score_matrix = np.zeros((n_orderings, N, N))
 leader_matrix = np.zeros((N, n_orderings), bool)

 steps = np.zeros(G,int)
 steps[G-1] = 3
 for n in range(G-2, -1, -1):
    steps[n] = 2 * steps[n+1] + 1

 gameplan_heap = np.zeros((2*G, steps[0], 4))

 # heap of game plans
 # each game plan consist of 2**G lines.
 # each game plan line contains 4 numbers. The first number is the match number, then the two players to match, and the chance that the first player wins
 # the rule is: if the first player wins, go to the next line, else go to line +X,
 #  where X = (2+G)**(G - iteration + 1)
 # After G iterations: 
 #  two lines say: player <number 1> won, which is correct in <number 2> % of the cases
 #  one line for if the first player won, another line for if the second player won

 for n, ordering in enumerate(permutations(range(N))):
    for nn1 in range(N):
        p1 = ordering[nn1]
        if nn1 == 0:
            leader_matrix[p1, n] = 1
        for nn2 in range(nn1+1, N):
            p2 = ordering[nn2]
            score_matrix[n, p1, p2] = 2/3
            score_matrix[n, p2, p1] = 1/3

 proba = np.zeros((G+1, n_orderings)) #probability of evidence-given-ordering for each ordering, normalized to 1
 proba[0,:] = 1/N

 totcount = 0
 for p1 in range(N):
    for p2 in range(p1+1, N):
        totcount += 1

 def choose_next_game(iteration, line, heapsize):
    best_score = None
    best_players = None
    last_proba = proba[iteration-1]
    gameplan_heap[heapsize+1:] = gameplan_heap[heapsize]    
    progress = 0
    for p1 in range(N):
        for p2 in range(p1+1, N):
                                    
            # What if p1 wins
            score_vector1 = score_matrix[:, p1, p2]
            proba1 = last_proba * score_vector1
            proba1 /= proba1.sum()
            proba[iteration] = proba1
            if iteration == G:
                winner_proba1 = leader_matrix.dot(proba1)
                # give a slight bias to the winner of the last match
                winner_proba1_biased = winner_proba1.copy()
                winner_proba1_biased[p1] += 1e-10
                #
                best_winner1 = np.argmax(winner_proba1_biased)
                best_score1 = winner_proba1[best_winner1]
            else:
                best_score1 = choose_next_game(iteration+1, line+1, heapsize+1)

            score_vector2 = score_matrix[:, p2, p1]
            proba2 = last_proba * score_vector2
            proba2 /= proba2.sum()
            proba[iteration] = proba2
            if iteration == G:
                winner_proba2 = leader_matrix.dot(proba2)                
                # give a slight bias to the winner of the last match
                winner_proba2_biased = winner_proba2.copy()
                winner_proba2_biased[p2] += 1e-10
                #
                best_winner2 = np.argmax(winner_proba2_biased)                
                best_score2 = winner_proba2[best_winner2]
            else:
                step = steps[iteration]
                best_score2 = choose_next_game(iteration+1, line+step+1, heapsize+1)

            chance1, chance2 = np.sum(last_proba * score_vector1), np.sum(last_proba * score_vector2)
            chance12 = chance1 + chance2
            chance1, chance2 = chance1/chance12, chance2/chance12
            best_score0 = chance1 * best_score1 + chance2 * best_score2
            if best_score is None or best_score0 > best_score + 1e-12:
                gameplan_heap[heapsize] = gameplan_heap[heapsize+1]
                best_score = best_score0
                best_chance1 = chance1
                best_players = p1, p2
                if iteration == G:
                    best_winners = best_winner1, best_winner2
                    best_success = best_score1, best_score2

            if iteration == 1:
                progress += 1 
                print("Progress: %d %%" % (100 * progress/totcount),file=sys.stderr)


    p1, p2 = best_players
    gameplan_heap[heapsize,line] = iteration, p1, p2, best_chance1
    if iteration == G:
        gameplan_heap[heapsize,line+1] = 99, best_winners[0], best_success[0], 99
        gameplan_heap[heapsize,line+2] = 99, best_winners[1], best_success[1], 99
    return best_score



 success_rate = choose_next_game(1, 0, 0)
 print(gameplan_heap[0])
 print("Success rate: %.3f" % (100 *success_rate))
	"""
	Recursive Bayesian approach to create optimal tournament schedules

	Solution for The Riddler Classic, July 19: https://fivethirtyeight.com/features/can-you-construct-the-optimal-tournament/

	Results:

	4 players, 4 games:
	Game 1. Player A vs player B, winner becomes the favorite.
	Game 2. Favorite vs player C.
	Game 3. Favorite vs player D.
	If the favorite wins all three games, she is declared the champion. A re-match game 4 against player B/A is unnecessary, but may improve
	the confidence (60.8 % if the favorite wins).
	If the favorite loses game 2 or 3, game 4 must be a re-match of that game, and determines the champion.
	The favorite losing exactly one of the four games is the worst-case scenario (she will be declared champion with 42.9 % confidence).
	If the favorite loses both game 2 and game 3, then game 4 is player C vs player D, which determines the champion (44.4 % confidence)

	5 players, 5 games
	The same three games as before. The favorite remains favorite unless she loses both game 2 and 3, which will make player C the new
	favorite. Game 4 is the favorite against player E.
	If the favorite wins all four games, she is declared the champion. A re-match game 5 against player B/A is unnecessary, but may improve
	the confidence (57.7 % if the favorite wins).
	If she wins game 2 but loses game 3 or 4, game 5 must be a re-match of that game, and determines the champion. If she loses game 3
	and 4, game 5 is between player D and E, which determines the champion.
	If she loses game 2 but wins game 3, game 5 must be a re-match of game 2, and determines the champion.
	If she loses game 2 and 3, player C becomes favorite. The winner of game 4 will play for the championship against player D.
	The worst scenario is if player E wins game 4 but player D wins game 5, becoming champion at 35.6 % confidence.

	"""

	N = 5
	G = 5

	import sys
	import numpy as np
	from itertools import permutations
	from math import factorial

	n_orderings = factorial(N)
	score_matrix = np.zeros((n_orderings, N, N))
	leader_matrix = np.zeros((N, n_orderings), bool)

	steps = np.zeros(G,int)
	steps[G-1] = 3
	for n in range(G-2, -1, -1):
	steps[n] = 2 * steps[n+1] + 1

	gameplan_heap = np.zeros((2*G, steps[0], 4))

	# heap of game plans
	# each game plan consist of 2**G lines.
	# each game plan line contains 4 numbers. The first number is the match number, then the two players to match, and the chance that the first player wins
	# the rule is: if the first player wins, go to the next line, else go to line +X,
	# where X = (2+G)**(G - iteration + 1)
	# After G iterations:
	# two lines say: player <number 1> won, which is correct in <number 2> % of the cases
	# one line for if the first player won, another line for if the second player won

	for n, ordering in enumerate(permutations(range(N))):
	for nn1 in range(N):
	p1 = ordering[nn1]
	if nn1 == 0:
	leader_matrix[p1, n] = 1
	for nn2 in range(nn1+1, N):
	p2 = ordering[nn2]
	score_matrix[n, p1, p2] = 2/3
	score_matrix[n, p2, p1] = 1/3

	proba = np.zeros((G+1, n_orderings)) #probability of evidence-given-ordering for each ordering, normalized to 1
	proba[0,:] = 1/N

	totcount = 0
	for p1 in range(N):
	for p2 in range(p1+1, N):
	totcount += 1

	def choose_next_game(iteration, line, heapsize):
	best_score = None
	best_players = None
	last_proba = proba[iteration-1]
	gameplan_heap[heapsize+1:] = gameplan_heap[heapsize]
	progress = 0
	for p1 in range(N):
	for p2 in range(p1+1, N):

	# What if p1 wins
	score_vector1 = score_matrix[:, p1, p2]
	proba1 = last_proba * score_vector1
	proba1 /= proba1.sum()
	proba[iteration] = proba1
	if iteration == G:
	winner_proba1 = leader_matrix.dot(proba1)
	# give a slight bias to the winner of the last match
	winner_proba1_biased = winner_proba1.copy()
	winner_proba1_biased[p1] += 1e-10
	#
	best_winner1 = np.argmax(winner_proba1_biased)
	best_score1 = winner_proba1[best_winner1]
	else:
	best_score1 = choose_next_game(iteration+1, line+1, heapsize+1)

	score_vector2 = score_matrix[:, p2, p1]
	proba2 = last_proba * score_vector2
	proba2 /= proba2.sum()
	proba[iteration] = proba2
	if iteration == G:
	winner_proba2 = leader_matrix.dot(proba2)
	# give a slight bias to the winner of the last match
	winner_proba2_biased = winner_proba2.copy()
	winner_proba2_biased[p2] += 1e-10
	#
	best_winner2 = np.argmax(winner_proba2_biased)
	best_score2 = winner_proba2[best_winner2]
	else:
	step = steps[iteration]
	best_score2 = choose_next_game(iteration+1, line+step+1, heapsize+1)

	chance1, chance2 = np.sum(last_proba * score_vector1), np.sum(last_proba * score_vector2)
	chance12 = chance1 + chance2
	chance1, chance2 = chance1/chance12, chance2/chance12
	best_score0 = chance1 * best_score1 + chance2 * best_score2
	if best_score is None or best_score0 > best_score + 1e-12:
	gameplan_heap[heapsize] = gameplan_heap[heapsize+1]
	best_score = best_score0
	best_chance1 = chance1
	best_players = p1, p2
	if iteration == G:
	best_winners = best_winner1, best_winner2
	best_success = best_score1, best_score2

	if iteration == 1:
	progress += 1
	print("Progress: %d %%" % (100 * progress/totcount),file=sys.stderr)


	p1, p2 = best_players
	gameplan_heap[heapsize,line] = iteration, p1, p2, best_chance1
	if iteration == G:
	gameplan_heap[heapsize,line+1] = 99, best_winners[0], best_success[0], 99
	gameplan_heap[heapsize,line+2] = 99, best_winners[1], best_success[1], 99
	return best_score



	success_rate = choose_next_game(1, 0, 0)
	print(gameplan_heap[0])
	print("Success rate: %.3f" % (100 *success_rate))