breeko · January 28, 2018 12:59
diff --git a/mini_max_time_avg_tic_tac_toe.py b/mini_max_time_avg_tic_tac_toe.py
 class MiniMaxTimeAverageBot:
    def __init__(self, time_penalty = -0.01, sub_optimal_weight=0.1):
        """ Returns best move given game and player """
        self.time_penalty = time_penalty
        self.memo = {}
        self.sub_optimal_weight = sub_optimal_weight
        
    def play(self, game, player):
        return self._mini_max(game, player)[0]

    def _mini_max(self, game, player, num_moves=0):
        """ Helper function for get_best_move. Returns best move and score given game and player """
        if player not in self.memo:
            self.memo[player] = {}
            
        player_memo = self.memo[player]
        
        if game not in player_memo:
            if game.gameover():
                best_move = None
                best_score = game.score_game(player)
            else:
                alt_player = [alt_player for alt_player in game.legal_players if alt_player != player][0]
                moves = game.get_moves()
                best_score = float("-inf")
                sub_optimal_sum = 0
                for move in moves:
                    clone = game.copy()
                    clone.move(player, move)
                    _, score = self._mini_max(clone, player=alt_player, num_moves=num_moves+1)
                    score *= -1
                    score += self.time_penalty * num_moves
                    sub_optimal_sum += score

                    if score > best_score:
                        best_move = move 
                        best_score = score
                sub_optimal_sum -= best_score # Remove the best score from the sub-optimal running sum
                sub_optimal = sub_optimal_sum / len(moves) # Average the sub-optimal returns
                # Take a weighted average of the perfect and sub-optimal returns based on sub-optimal weight provided
                best_score = ((1 - self.sub_optimal_weight) * best_score) + (self.sub_optimal_weight * sub_optimal)
            self.memo[player][game] = (best_move, best_score)
        return self.memo[player][game]
	class MiniMaxTimeAverageBot:
	def __init__(self, time_penalty = -0.01, sub_optimal_weight=0.1):
	""" Returns best move given game and player """
	self.time_penalty = time_penalty
	self.memo = {}
	self.sub_optimal_weight = sub_optimal_weight

	def play(self, game, player):
	return self._mini_max(game, player)[0]

	def _mini_max(self, game, player, num_moves=0):
	""" Helper function for get_best_move. Returns best move and score given game and player """
	if player not in self.memo:
	self.memo[player] = {}

	player_memo = self.memo[player]

	if game not in player_memo:
	if game.gameover():
	best_move = None
	best_score = game.score_game(player)
	else:
	alt_player = [alt_player for alt_player in game.legal_players if alt_player != player][0]
	moves = game.get_moves()
	best_score = float("-inf")
	sub_optimal_sum = 0
	for move in moves:
	clone = game.copy()
	clone.move(player, move)
	_, score = self._mini_max(clone, player=alt_player, num_moves=num_moves+1)
	score *= -1
	score += self.time_penalty * num_moves
	sub_optimal_sum += score

	if score > best_score:
	best_move = move
	best_score = score
	sub_optimal_sum -= best_score # Remove the best score from the sub-optimal running sum
	sub_optimal = sub_optimal_sum / len(moves) # Average the sub-optimal returns
	# Take a weighted average of the perfect and sub-optimal returns based on sub-optimal weight provided
	best_score = ((1 - self.sub_optimal_weight) * best_score) + (self.sub_optimal_weight * sub_optimal)
	self.memo[player][game] = (best_move, best_score)
	return self.memo[player][game]