Created
August 24, 2020 01:24
-
-
Save keisukefukuda/a4c89cff20ea320924bf8bf0a63b277e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# learners.py | |
def update(self, transitions): | |
# Override DQN.update() and update_from_episodes() to kick | |
# allreduce communication to synchronize cumulative steps among | |
# all learners | |
# Note: Overriding update()/update_from_episodes() is a quick hack to | |
# implement synchronizing cumulative_steps. | |
# PFRL doesn't have a mechanism to hook events in a learner loop. | |
# (`step_hook` in pfrl.experiments.train_agent() works well, but | |
# `global_step_hook in pfrl.experiments.train_agent_async() does | |
# not work because the hook only works in actors.) | |
# For a "correct" implementation, we need to implement a | |
# learner loop hook mechanism in PFRL. | |
# training is in actor-learner mode. here we use 'update_counter' | |
# (not optim_t) to determine whether it's actor-learner mode. | |
if hasattr(self, 'update_counter'): | |
self._sync_cum_steps_hook(None, self, self.optim_t) | |
super().update(transitions) | |
def update_from_episodes(self, episodes, errors_out=None): | |
if hasattr(self, 'update_counter'): | |
self._sync_cum_steps_hook(None, self, self.optim_t) | |
super().update_from_episodes(episodes, errors_out) | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment