Created
February 25, 2022 11:25
-
-
Save thunderInfy/fa6ec4c0fe065120b65f88ec0a7add41 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def expand_and_evaluate(self, parent, action, child): | |
if child.win is None: | |
# it's either a non-terminal node or a terminal node that hasn't been | |
# visited before | |
# using controller (which knows the rules of the game) to get the next state | |
next_state_obj, win = self.controller.get_next_state(parent.state, action) | |
next_state = next_state_obj.get_array_view() | |
if win is None: | |
# not a terminal node | |
child.state = next_state | |
child.set_action_validity() | |
child.initialize_edges() | |
val = child.get_value() | |
else: | |
# it's a terminal node | |
child.win = win | |
# say win is 'red' and parent's turn was also red, that | |
# means parent has won, which implies the child has lost, i.e., value = -1 | |
# for the child | |
val = -1 if parent.state['player_turn'] == win else 1 | |
else: | |
# it's a terminal node | |
val = -1 if parent.state['player_turn'] == child.win else 1 | |
# updating the statistics for the newly expanded node | |
child.W += val | |
child.N += 1 | |
''' | |
value for the current player positively correlates with the winning chances of the current player. | |
If the current player has value +1, it means it would likely win, | |
which implies its children (and its parent, if it is not the root node), which are actually | |
the opposite player, would have value -1. | |
hence -1 * val is passed back | |
''' | |
return -val |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment