Skip to content

Instantly share code, notes, and snippets.

@1328
Created June 23, 2015 19:03
Show Gist options
  • Select an option

  • Save 1328/8a1ad6e042935d173cbc to your computer and use it in GitHub Desktop.

Select an option

Save 1328/8a1ad6e042935d173cbc to your computer and use it in GitHub Desktop.
Tic_tac_toe
import random
import collections
import logging
from pprint import pprint
SAVEFILE = 't1'
class Board(object):
template = '''
\t{}|{}|{}
\t------
\t{}|{}|{}
\t------
\t{}|{}|{}
'''
combos = [
[0,1,2],
[3,4,5],
[6,7,8],
[0,3,6],
[1,4,7],
[2,5,8],
[0,4,8],
[2,4,6],
]
def __init__(self, tiles = None):
'''
create a new board object
optionally if specified from a list of tile values
'''
if tiles is not None:
self.tiles = tiles
else:
self.tiles = [str(i) for i in range(1,10)]
def __str__(self):
'''
using template format a nice board
'''
return self.template.format(*self.tiles)
def __repr__(self):
'''
added so that we can have a more clear debugging
'''
return 'Board({})'.format(','.join(i for i in self.tiles))
def __hash__(self):
'''
so that we can use a Board object as a dict key
'''
return hash(tuple(self.tiles))
def __eq__(self, other):
'''
so that we can use a Board object as a dict key
'''
return all(a==b for a,b in zip(self.tiles, other.tiles))
@property
def open_tiles(self):
'''
returns open tiles
'''
return [i for i,t in enumerate(self.tiles) if t not in ('X','O')]
def open(self, tile):
'''
return true/false if specific tile is open
'''
return self.tiles[tile] not in ('X','O')
def done(self):
'''
returns true false if game over, eithe because a winner or because all
tiles are taken
'''
if self.winner():
return True
return len(self.open_tiles) == 0
def copy(self):
'''
copy the board object
'''
other = Board()
other.tiles = [t for t in self.tiles]
return other
def winner(self):
'''
tests all the patterns in combos to see if they all have the same value
'''
for pattern in self.combos:
test = [self.tiles[i] for i in pattern]
if len(set(test)) == 1:
return test[0]
class ToeMaster(object):
'''
simple AI that learns from its past mistakes to calculate best possible
moves
stores moves in a dictionary form:
self.moves[board_state][move_choice] = value of move
you can then lookup a potential strategy passing .pick() a board state, this
looks up the state in self.moves and returns the highest valued response
'''
def __init__(self, load_file = None):
self.moves = {}
if load_file is not None:
self.load(load_file)
def load(self, load_file):
'''
loads saved outcome data, format:
board state # choice # outcome value
'''
with open(load_file, mode = 'r') as fh:
for line in fh:
line = line.strip()
state, choice, outcome = line.split('#')
choice = int(choice)
outcome = float(outcome)
state = Board(state.split(','))
if state not in self.moves:
self.moves[state] = {}
self.moves[state][choice] = outcome
def iter(self):
for state, choices in self.moves.items():
for choice, outcome in choices.items():
yield state, choice, outcome
def save(self, save_file):
'''
save the moves dictionary to a file
'''
with open(save_file, mode = 'w') as fh:
for state, choice, outcome in self.iter():
line = '{}#{}#{}\n'.format(','.join(state.tiles), choice, outcome)
fh.write(line)
def random(self, board, excludes = None):
choices = board.open_tiles
if excludes:
choices = list(set(choices) - set(excludes))
if len(choices) == 0:
choices = board.open_tiles
picked = random.choice(choices)
log.debug('Randomly chose {}, from {}, excluding {}'.format(
picked, choices, excludes))
return picked
def get_choices(self, board_state):
'''
returns a sorted list of choices based on board state
sorted by best to worst choices
'''
choices = self.moves.get(board_state, {})
choices = sorted(choices.items(), key = lambda x:-x[1])
log.debug('Options are: {}'.format(', '.join('{0}:{1:2.2f}'.format(
i[0]+1, i[1]) for i in choices)))
return choices
def pick(self, board, randomize = 0):
'''
generates a move for the computer
randomize (0-1) allows for a random move to be put in, so that the computer
does not get stuck in a rut
'''
if random.random() < randomize:
return self.random(board)
# keep a list of known bad moves
excludes = set()
for move, outcome in self.get_choices(board):
log.debug('\t Picking {} has an outcome value of {}'.format(
move + 1, outcome))
if outcome < 0:
# skip any outcome that only leads to tears
# add to the exclude list so it is not randomly chosen later
excludes.add(move)
else:
return move
log.debug('AI failed to come up with a good solution')
return self.random(board, excludes)
def learn(self, board, path):
'''
learn from a path of games
path is a list of lists [[move_choice, player, pre-move state],...]
'''
winner = board.winner()
log.debug('Winner was {}'.format(winner))
number_moves = len(path)
for idx, (move, player, state) in enumerate(path):
if state not in self.moves:
self.moves[state] = {}
if move not in self.moves[state]:
self.moves[state][move] = 0
# ok, needed some kind of weighting that put the emphasis on the end
# game, so we ramp up the probabilities as the board nears a victory
# condition
inc = 100/(number_moves-idx)
if winner == player:
inc = inc
elif winner == None:
inc = inc/2
else:
inc = -inc
log.debug('moves({})[{}] += {}'.format(
state.__repr__(), move, inc))
self.moves[state][move] += inc
def auto(ai, randomize = 0):
'''
auto plays a game, returns the board and the path that led to the board
'''
path = []
board = Board()
players = ('X','O')
tic = 0
while not board.done():
choice = ai.pick(board, randomize)
who = players[tic]
path.append([choice, who, board.copy()])
board.tiles[choice] = who
tic = int(not tic)
return board, path
def get_choice(board):
'''
helper function that gets a valid open board tile
'''
while True:
i = input('Move? ')
if i == '':
return
try:
i = int(i) - 1
if board.open(i):
return i
else:
print('That tile is already taken')
except ValueError:
print('give me a number')
except IndexError:
print('out of range!')
def manual(ai):
log.setLevel(logging.DEBUG)
board = Board()
path = []
while not board.done():
print(board)
picked = get_choice(board)
if picked is None:
picked = ai.pick(board)
path.append([picked, 'X', board.copy()])
board.tiles[picked] = 'X'
if not board.done():
computer = ai.pick(board)
path.append([computer, 'O', board.copy()])
board.tiles[computer] = 'O'
messages = {'X':'You won', 'O':'Looser!', None:'Tie!'}
print('-----')
print(messages[board.winner()])
print(board)
return board, path
def main():
ai = ToeMaster(SAVEFILE)
# 10,000 did a pretty good job of training, with a .3 randomizing factor
for i in range(50000):
board, path = auto(ai, 0)
ai.learn(board, path)
ai.save(SAVEFILE)
board, path = manual(ai)
ai.learn(board, path)
ai.save(SAVEFILE)
if __name__ == '__main__':
logging.basicConfig(format='%(levelname)s:%(message)s')
log = logging.getLogger()
#log.setLevel(logging.DEBUG)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment