davidADSP · December 1, 2019 14:45
diff --git a/pseudocode.py b/pseudocode.py
 # Each game is produced by starting at the initial board position, then
 # repeatedly executing a Monte Carlo Tree Search to generate moves until the end
 # of the game is reached.
 def play_game(config: MuZeroConfig, network: Network) -> Game:
  game = config.new_game()

  while not game.terminal() and len(game.history) < config.max_moves:
    # At the root of the search tree we use the representation function to
    # obtain a hidden state given the current observation.
    root = Node(0)
    current_observation = game.make_image(-1)
    expand_node(root, game.to_play(), game.legal_actions(),
                network.initial_inference(current_observation))
    add_exploration_noise(config, root)

    # We then run a Monte Carlo Tree Search using only action sequences and the
    # model learned by the network.
    run_mcts(config, root, game.action_history(), network)
    action = select_action(config, len(game.history), root, network)
    game.apply(action)
    game.store_search_statistics(root)
  return game
	# Each game is produced by starting at the initial board position, then
	# repeatedly executing a Monte Carlo Tree Search to generate moves until the end
	# of the game is reached.
	def play_game(config: MuZeroConfig, network: Network) -> Game:
	game = config.new_game()

	while not game.terminal() and len(game.history) < config.max_moves:
	# At the root of the search tree we use the representation function to
	# obtain a hidden state given the current observation.
	root = Node(0)
	current_observation = game.make_image(-1)
	expand_node(root, game.to_play(), game.legal_actions(),
	network.initial_inference(current_observation))
	add_exploration_noise(config, root)

	# We then run a Monte Carlo Tree Search using only action sequences and the
	# model learned by the network.
	run_mcts(config, root, game.action_history(), network)
	action = select_action(config, len(game.history), root, network)
	game.apply(action)
	game.store_search_statistics(root)
	return game