Created
January 29, 2021 10:17
-
-
Save vinczebalazs/3b494e62aa5914580ff6423f27906301 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
A basic adaptive bot. This is part of the third worksheet. | |
""" | |
from api import State, util | |
import random, os | |
from itertools import chain | |
import joblib | |
# Path of the model we will use. If you make a model | |
# with a different name, point this line to its path. | |
DEFAULT_MODEL = os.path.abspath("models/m_rand_g50000_f1_MLPC.pkl") | |
class Bot: | |
__max_depth = -1 | |
__randomize = True | |
__model = None | |
def __init__(self, randomize=True, model_file=DEFAULT_MODEL, depth=8): | |
self.__randomize = randomize | |
# Load the model | |
self.__model = joblib.load(model_file) | |
def get_move(self, state): | |
if state.get_phase() == 1: | |
move = self.ml_value(state)[1] | |
return move | |
else: | |
move = self.alphabeta_value(state)[1] | |
return move | |
def ml_value(self, state): | |
""" | |
Return the value of this state and the associated move | |
:param state: | |
:return: val, move: the value of the state, and the best move. | |
""" | |
best_value = float('-inf') if maximizing(state) else float('inf') | |
best_move = None | |
moves = state.moves() | |
if self.__randomize: | |
random.shuffle(moves) | |
for move in moves: | |
next_state = state.next(move) | |
value = self.ml_heuristic(next_state) | |
if maximizing(state): | |
if value > best_value: | |
best_value = value | |
best_move = move | |
else: | |
if value < best_value: | |
best_value = value | |
best_move = move | |
return best_value, best_move | |
def ml_heuristic(self, state): | |
feature_vector = [features(state)] | |
classes = list(self.__model.classes_) | |
prob = self.__model.predict_proba(feature_vector)[0] | |
# Weigh the win/loss outcomes (-1 and 1) by their probabilities | |
res = -1.0 * prob[classes.index('lost')] + 1.0 * prob[classes.index('won')] | |
return res | |
def alphabeta_value(self, state, alpha=float('-inf'), beta=float('inf'), depth = 0): | |
""" | |
Return the value of this state and the associated move | |
:param State state: | |
:param float alpha: The highest score that the maximizing player can guarantee given current knowledge | |
:param float beta: The lowest score that the minimizing player can guarantee given current knowledge | |
:param int depth: How deep we are in the tree | |
:return val, move: the value of the state, and the best move. | |
""" | |
if state.finished(): | |
winner, points = state.winner() | |
return (points, None) if winner == 1 else (-points, None) | |
if depth == self.__max_depth: | |
return self.ml_heuristic(state) | |
best_value = float('-inf') if maximizing(state) else float('inf') | |
best_move = None | |
moves = state.moves() | |
if self.__randomize: | |
random.shuffle(moves) | |
for move in moves: | |
next_state = state.next(move) | |
value, _ = self.alphabeta_value(next_state, alpha, beta, depth+1) | |
if maximizing(state): | |
if value > best_value: | |
best_value = value | |
best_move = move | |
alpha = best_value | |
else: | |
if value < best_value: | |
best_value = value | |
best_move = move | |
beta = best_value | |
# Prune the search tree | |
if maximizing(state): | |
if best_value > beta: | |
break | |
else: | |
if best_value < alpha: | |
break | |
return best_value, best_move | |
def maximizing(state): | |
""" | |
Whether we're the maximizing player (1) or the minimizing player (2). | |
:param state: | |
:return: | |
""" | |
return state.whose_turn() == 1 | |
def features(state): | |
# type: (State) -> tuple[float, ...] | |
""" | |
Extract features from this state. Remember that every feature vector returned should have the same length. | |
:param state: A state to be converted to a feature vector | |
:return: A tuple of floats: a feature vector representing this state. | |
""" | |
feature_set = [] | |
# Add player 1's points to feature set | |
p1_points = state.get_points(1) | |
# Add player 2's points to feature set | |
p2_points = state.get_points(2) | |
# Add player 1's pending points to feature set | |
p1_pending_points = state.get_pending_points(1) | |
# Add player 2's pending points to feature set | |
p2_pending_points = state.get_pending_points(2) | |
# Get trump suit | |
trump_suit = state.get_trump_suit() | |
# Add phase to feature set | |
phase = state.get_phase() | |
# Add stock size to feature set | |
stock_size = state.get_stock_size() | |
# Add leader to feature set | |
leader = state.leader() | |
# Add whose turn it is to feature set | |
whose_turn = state.whose_turn() | |
# Add opponent's played card to feature set | |
opponents_played_card = state.get_opponents_played_card() | |
################## You do not need to do anything below this line ######################## | |
perspective = state.get_perspective() | |
# Perform one-hot encoding on the perspective. | |
# Learn more about one-hot here: https://machinelearningmastery.com/how-to-one-hot-encode-sequence-data-in-python/ | |
perspective = [card if card != 'U' else [1, 0, 0, 0, 0, 0] for card in perspective] | |
perspective = [card if card != 'S' else [0, 1, 0, 0, 0, 0] for card in perspective] | |
perspective = [card if card != 'P1H' else [0, 0, 1, 0, 0, 0] for card in perspective] | |
perspective = [card if card != 'P2H' else [0, 0, 0, 1, 0, 0] for card in perspective] | |
perspective = [card if card != 'P1W' else [0, 0, 0, 0, 1, 0] for card in perspective] | |
perspective = [card if card != 'P2W' else [0, 0, 0, 0, 0, 1] for card in perspective] | |
# Append one-hot encoded perspective to feature_set | |
feature_set += list(chain(*perspective)) | |
# Append normalized points to feature_set | |
total_points = p1_points + p2_points | |
feature_set.append(p1_points/total_points if total_points > 0 else 0.) | |
feature_set.append(p2_points/total_points if total_points > 0 else 0.) | |
# Append normalized pending points to feature_set | |
total_pending_points = p1_pending_points + p2_pending_points | |
feature_set.append(p1_pending_points/total_pending_points if total_pending_points > 0 else 0.) | |
feature_set.append(p2_pending_points/total_pending_points if total_pending_points > 0 else 0.) | |
# Convert trump suit to id and add to feature set | |
# You don't need to add anything to this part | |
suits = ["C", "D", "H", "S"] | |
trump_suit_onehot = [0, 0, 0, 0] | |
trump_suit_onehot[suits.index(trump_suit)] = 1 | |
feature_set += trump_suit_onehot | |
# Append one-hot encoded phase to feature set | |
feature_set += [1, 0] if phase == 1 else [0, 1] | |
# Append normalized stock size to feature set | |
feature_set.append(stock_size/10) | |
# Append one-hot encoded leader to feature set | |
feature_set += [1, 0] if leader == 1 else [0, 1] | |
# Append one-hot encoded whose_turn to feature set | |
feature_set += [1, 0] if whose_turn == 1 else [0, 1] | |
# Append one-hot encoded opponent's card to feature set | |
opponents_played_card_onehot = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] | |
opponents_played_card_onehot[opponents_played_card if opponents_played_card is not None else 20] = 1 | |
feature_set += opponents_played_card_onehot | |
# Return feature set | |
return feature_set |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment