Skip to content

Instantly share code, notes, and snippets.

@breeko
Last active January 28, 2018 12:59
Show Gist options
  • Save breeko/06c5de38713698a27c8a5c323f71266d to your computer and use it in GitHub Desktop.
Save breeko/06c5de38713698a27c8a5c323f71266d to your computer and use it in GitHub Desktop.
Mini-max with time penalty and sub-optimal weighting for tic-tac-toe
class MiniMaxTimeAverageBot:
def __init__(self, time_penalty = -0.01, sub_optimal_weight=0.1):
""" Returns best move given game and player """
self.time_penalty = time_penalty
self.memo = {}
self.sub_optimal_weight = sub_optimal_weight
def play(self, game, player):
return self._mini_max(game, player)[0]
def _mini_max(self, game, player, num_moves=0):
""" Helper function for get_best_move. Returns best move and score given game and player """
if player not in self.memo:
self.memo[player] = {}
player_memo = self.memo[player]
if game not in player_memo:
if game.gameover():
best_move = None
best_score = game.score_game(player)
else:
alt_player = [alt_player for alt_player in game.legal_players if alt_player != player][0]
moves = game.get_moves()
best_score = float("-inf")
sub_optimal_sum = 0
for move in moves:
clone = game.copy()
clone.move(player, move)
_, score = self._mini_max(clone, player=alt_player, num_moves=num_moves+1)
score *= -1
score += self.time_penalty * num_moves
sub_optimal_sum += score
if score > best_score:
best_move = move
best_score = score
sub_optimal_sum -= best_score # Remove the best score from the sub-optimal running sum
sub_optimal = sub_optimal_sum / len(moves) # Average the sub-optimal returns
# Take a weighted average of the perfect and sub-optimal returns based on sub-optimal weight provided
best_score = ((1 - self.sub_optimal_weight) * best_score) + (self.sub_optimal_weight * sub_optimal)
self.memo[player][game] = (best_move, best_score)
return self.memo[player][game]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment