keisukefukuda · August 24, 2020 01:24
diff --git a/tmp.py b/tmp.py
 # learners.py

    def update(self, transitions):
        # Override DQN.update()  and update_from_episodes() to kick
        # allreduce communication to synchronize cumulative steps among
        # all learners
        # Note: Overriding update()/update_from_episodes() is a quick hack to
        #       implement synchronizing cumulative_steps.
        #       PFRL doesn't have a mechanism to hook events in a learner loop.
        #       (`step_hook` in pfrl.experiments.train_agent() works well, but
        #       `global_step_hook in pfrl.experiments.train_agent_async() does
        #       not work because the hook only works in actors.)
        #       For a "correct" implementation, we need to implement a
        #       learner loop hook mechanism in PFRL.

        # training is in actor-learner mode. here we use 'update_counter'
        # (not optim_t) to determine whether it's actor-learner mode.
        if hasattr(self, 'update_counter'):
            self._sync_cum_steps_hook(None, self, self.optim_t)
        super().update(transitions)

    def update_from_episodes(self, episodes, errors_out=None):
        if hasattr(self, 'update_counter'):
            self._sync_cum_steps_hook(None, self, self.optim_t)
        super().update_from_episodes(episodes, errors_out)

        
 #
	# learners.py

	def update(self, transitions):
	# Override DQN.update() and update_from_episodes() to kick
	# allreduce communication to synchronize cumulative steps among
	# all learners
	# Note: Overriding update()/update_from_episodes() is a quick hack to
	# implement synchronizing cumulative_steps.
	# PFRL doesn't have a mechanism to hook events in a learner loop.
	# (`step_hook` in pfrl.experiments.train_agent() works well, but
	# `global_step_hook in pfrl.experiments.train_agent_async() does
	# not work because the hook only works in actors.)
	# For a "correct" implementation, we need to implement a
	# learner loop hook mechanism in PFRL.

	# training is in actor-learner mode. here we use 'update_counter'
	# (not optim_t) to determine whether it's actor-learner mode.
	if hasattr(self, 'update_counter'):
	self._sync_cum_steps_hook(None, self, self.optim_t)
	super().update(transitions)

	def update_from_episodes(self, episodes, errors_out=None):
	if hasattr(self, 'update_counter'):
	self._sync_cum_steps_hook(None, self, self.optim_t)
	super().update_from_episodes(episodes, errors_out)


	#