Skip to content

Instantly share code, notes, and snippets.

@breeko
breeko / mini_max_tic_tac_toe.py
Last active January 28, 2018 02:09
Mini-max for tic-tac-toe
class MiniMaxBot:
def __init__(self):
""" Returns best move given game and player """
self.memo = {}
def play(self, game, player):
return self._mini_max(game, player)[0]
def _mini_max(self, game, player):
""" Helper function for get_best_move. Returns best move and score given game and player """
@breeko
breeko / play_tic_tac_toe.py
Last active January 28, 2018 02:08
Tic-tac-toe interface used with mini-max
def play(game, bot):
human = None
while human is None:
human = input("Select player: {} ".format(game.legal_players)).upper()
if human not in game.legal_players:
print("Invalid option")
human = None
comp = [alt_player for alt_player in game.legal_players if alt_player != human][0]
turn = game.legal_players[0]
@breeko
breeko / mini_max_time_tic_tac_toe.py
Last active January 28, 2018 02:15
Mini-max with time penalty for tic-tac-toe
class MiniMaxTimeBot:
def __init__(self, time_penalty = -0.01):
""" Returns best move given game and player """
self.time_penalty = time_penalty
self.memo = {}
def play(self, game, player):
return self._mini_max(game, player)[0]
def _mini_max(self, game, player, num_moves=0):
@breeko
breeko / mini_max_time_avg_tic_tac_toe.py
Last active January 28, 2018 12:59
Mini-max with time penalty and sub-optimal weighting for tic-tac-toe
class MiniMaxTimeAverageBot:
def __init__(self, time_penalty = -0.01, sub_optimal_weight=0.1):
""" Returns best move given game and player """
self.time_penalty = time_penalty
self.memo = {}
self.sub_optimal_weight = sub_optimal_weight
def play(self, game, player):
return self._mini_max(game, player)[0]
@breeko
breeko / mini_max_time_avg_limit.py
Created January 28, 2018 04:51
Mini-max with time penalty, sub-optimal weighting and a limit as to to recursion
class MiniMaxTimeAverageLimitBot:
def __init__(self, time_penalty = -0.01, sub_optimal_weight=0.1, limit=float("inf")):
""" Returns best move given game and player """
self.time_penalty = time_penalty
self.memo = {}
self.sub_optimal_weight = sub_optimal_weight
self.limit = limit
def play(self, game, player):
return self._mini_max(game, player)[0]
@breeko
breeko / connect.py
Created January 28, 2018 04:54
Connect-4 type game object
class Connect:
def __init__(self, connect=4, width=7, height=6):
self.height = height
self.width = width
self.connect = connect
self.legal_players = ["X", "O"]
self.reset()
def reset(self):
self.board = [[" " for _ in range(self.width)] for _ in range(self.height)]
@breeko
breeko / evolutionary_models.py
Last active February 5, 2018 11:23
Simple evolutionary models that can be used with OpenAI environments
import numpy as np
import gym
def create_population(env, size=1, mean=0, std=1):
""" Creates a population """
params = env.observation_space.shape[0] # Parameters in our environment state
action_space = env.action_space.n # Possible actions
if mean is None:
# No mean specified, assume mean of zero
mean = np.zeros((params, action_space))
@breeko
breeko / Q_network.py
Last active February 18, 2018 12:57
Q-network used for reinforcement learning
from keras.models import Model
from keras.layers import Conv2D, Dense, Flatten, Input, Lambda
import keras.backend as K
class Qnetwork():
def __init__(self, final_layer_size, input_shape, num_actions):
# The input image of the game is 84 x 84 x 3 (RGB)
self.inputs = Input(shape=[input_shape], name="main_input")
# There will be four layers of convolutions performed on the image input
@breeko
breeko / ExperienceReplay.py
Created February 18, 2018 12:52
Experience replay class for reinforcement learning
import numpy as np
class ExperienceReplay:
def __init__(self,buffer_size=50000):
""" Data structure used to hold game experiences """
# Buffer will contain [state,action,reward,next_state,done]
self.buffer = []
self.buffer_size = buffer_size
def add(self, experience):
@breeko
breeko / train_snip.py
Last active February 18, 2018 20:00
A portion of a larger program that uses experience replay object to train a q-network
if num_episode % update_freq == 0:
for num_epoch in range(num_epochs):
# Train batch is [[state,action,reward,next_state,done],...]
train_batch = experience_replay.sample(batch_size)
# Separate the batch into its components
train_state, train_action, train_reward, \
train_next_state, train_done = train_batch.T
# Convert the action array into an array of ints so they can be used for indexing