Skip to content

Instantly share code, notes, and snippets.

@denkspuren
Created May 21, 2019 18:37
Show Gist options
  • Save denkspuren/f36df1898795cebc0b3654045702972e to your computer and use it in GitHub Desktop.
Save denkspuren/f36df1898795cebc0b3654045702972e to your computer and use it in GitHub Desktop.
Die Monte Carlo Tree Search für Tic-Tac-Toe (Code zu meinem Video https://youtu.be/CjldSexfOuU)
# Code aus meinem YouTube-Video https://youtu.be/CjldSexfOuU
import random, copy
class Board:
def __init__(self):
self.board = [0,0,0,0,0,0,0,0,0]
self.moves = []
self.turn = +1
def __repr__(self):
str, repr = "", "O.X"
for i in [0,3,6]:
str += "{} {} {}\n".format(repr[self.board[i]+1],repr[self.board[i+1]+1],repr[self.board[i+2]+1])
return str[:-1]
def list_moves(self):
return [i for i in range(0,len(self.board)) if self.board[i] == 0]
def make_move(self, pos):
assert 0 <= pos <= 8 and self.board[pos] == 0
self.board[pos] = self.turn
self.moves += [pos]
self.turn = -self.turn
def undo_move(self):
assert self.moves != []
pos = self.moves.pop()
self.board[pos] = 0
self.turn = -self.turn
def three_in_a_row(self):
for row in [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,6]]:
if abs(sum([self.board[i] for i in row])) == 3: return self.board[row[0]]
return 0
def play_randomly(board):
value = board.three_in_a_row()
while value == 0:
moves = board.list_moves()
# print("moves =", moves)
if moves == []: return 0
random_move = moves[random.randrange(0,len(moves))]
# print("move is", random_move)
board.make_move(random_move)
# print(board)
value = board.three_in_a_row()
return value
def simulate_plays(board, number=100):
assert number >= 1
count = [0,0,0]
while number > 0:
b = copy.deepcopy(board)
count[play_randomly(b)+1] += 1
number -= 1
return count
def evaluate_moves(board, number=100):
assert number >= 1
moves = board.list_moves()
values = []
for move in moves:
board.make_move(move)
values += [simulate_plays(board,number)]
board.undo_move()
return values
def choose_best_move(board, number=100):
assert number >= 1 and board.list_moves() != []
moves = board.list_moves()
values = [r * board.turn for l,m,r in evaluate_moves(board,number)]
max_val = max(values)
max_idx = values.index(max_val)
return moves[max_idx]
def play(board, number=100):
assert number >= 1
value = board.three_in_a_row()
while value == 0:
if board.list_moves() == []: return value
move = choose_best_move(board, number)
board.make_move(move)
# print(str(board) + "\n---")
value = board.three_in_a_row()
return value
def tournament(rounds, number=100):
count = [0,0,0]
while rounds > 0:
res = play(Board(),number)
count[res+1] += 1
rounds -= 1
return count
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment