Last active
February 25, 2022 11:16
-
-
Save thunderInfy/5d4c711b4dc5f12cc598a9019c47edec to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# backward passes of results from MCTS is handled through recursion | |
def selection(self, node, root = False, logging = False, actions = None): | |
# finds PUCT val for child nodes and returns the best child (the one with the max PUCT val) | |
# and the best action for parent | |
best_child, best_action = self.select_best_child(node, root) | |
# ignore this logging part, this is not the core part of this function | |
if(logging): | |
if actions == None: | |
actions = [] | |
actions.append(best_action[0]*args.M+best_action[1]) | |
if best_child.state is None: | |
# either a terminal node or a node that has to be expanded | |
val = self.expand_and_evaluate(node, best_action, best_child) | |
# ignore this logging part, this is not the core part of this function, used | |
# for visualization | |
if(logging): | |
with open('data.txt','a') as fout: | |
fout.write(', '.join([str(i) for i in actions])) | |
fout.write('\n') | |
else: | |
# recursive call to selection function | |
val = self.selection(best_child, False, logging, actions) | |
node.W += val | |
node.N += 1 | |
''' | |
value for the current player positively correlates with the winning chances of the current player. | |
If the current player has value +1, it means it would likely win, | |
which implies its children (and its parent, if it is not the root node), which are actually | |
the opposite player, would have value -1. | |
hence -1 * val is passed back | |
''' | |
return -val |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment