davidADSP · December 1, 2019 17:36
diff --git a/pseudocode.py b/pseudocode.py
 # We expand a node using the value, reward and policy prediction obtained from
 # the neural network.
 def expand_node(node: Node, to_play: Player, actions: List[Action],
                network_output: NetworkOutput):
  node.to_play = to_play
  node.hidden_state = network_output.hidden_state
  node.reward = network_output.reward
  policy = {a: math.exp(network_output.policy_logits[a]) for a in actions}
  policy_sum = sum(policy.values())
  for action, p in policy.items():
    node.children[action] = Node(p / policy_sum)
	# We expand a node using the value, reward and policy prediction obtained from
	# the neural network.
	def expand_node(node: Node, to_play: Player, actions: List[Action],
	network_output: NetworkOutput):
	node.to_play = to_play
	node.hidden_state = network_output.hidden_state
	node.reward = network_output.reward
	policy = {a: math.exp(network_output.policy_logits[a]) for a in actions}
	policy_sum = sum(policy.values())
	for action, p in policy.items():
	node.children[action] = Node(p / policy_sum)
No results found