Skip to content

Instantly share code, notes, and snippets.

@davidADSP
Last active November 29, 2019 13:31
Show Gist options
  • Save davidADSP/3315ccf96166075aeb0316f1f5a2ed0a to your computer and use it in GitHub Desktop.
Save davidADSP/3315ccf96166075aeb0316f1f5a2ed0a to your computer and use it in GitHub Desktop.
class MuZeroConfig(object):
def __init__(self,
action_space_size: int,
max_moves: int,
discount: float,
dirichlet_alpha: float,
num_simulations: int,
batch_size: int,
td_steps: int,
num_actors: int,
lr_init: float,
lr_decay_steps: float,
visit_softmax_temperature_fn,
known_bounds: Optional[KnownBounds] = None):
### Self-Play
self.action_space_size = action_space_size
self.num_actors = num_actors
self.visit_softmax_temperature_fn = visit_softmax_temperature_fn
self.max_moves = max_moves
self.num_simulations = num_simulations
self.discount = discount
# Root prior exploration noise.
self.root_dirichlet_alpha = dirichlet_alpha
self.root_exploration_fraction = 0.25
# UCB formula
self.pb_c_base = 19652
self.pb_c_init = 1.25
# If we already have some information about which values occur in the
# environment, we can use them to initialize the rescaling.
# This is not strictly necessary, but establishes identical behaviour to
# AlphaZero in board games.
self.known_bounds = known_bounds
### Training
self.training_steps = int(1000e3)
self.checkpoint_interval = int(1e3)
self.window_size = int(1e6)
self.batch_size = batch_size
self.num_unroll_steps = 5
self.td_steps = td_steps
self.weight_decay = 1e-4
self.momentum = 0.9
# Exponential learning rate schedule
self.lr_init = lr_init
self.lr_decay_rate = 0.1
self.lr_decay_steps = lr_decay_steps
def new_game(self):
return Game(self.action_space_size, self.discount)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment