pythonlessons · January 14, 2020 20:39
diff --git a/act_function.py b/act_function.py
 def act(self, state, decay_step):
    # EPSILON GREEDY STRATEGY
    if self.epsilon_greedy:
    # Here we'll use an improved version of our epsilon greedy strategy for Q-learning
        explore_probability = self.epsilon_min + (self.epsilon - self.epsilon_min) * np.exp(-self.epsilon_decay * decay_step)
    # OLD EPSILON STRATEGY
    else:
        if self.epsilon > self.epsilon_min:
            self.epsilon *= (1-self.epsilon_decay)
        explore_probability = self.epsilon

    if explore_probability > np.random.rand():
        # Make a random action (exploration)
        return random.randrange(self.action_size), explore_probability
    else:
        # Get action from Q-network (exploitation)
        # Estimate the Qs values state
        # Take the biggest Q value (= the best action)
        return np.argmax(self.model.predict(state)), explore_probability
	def act(self, state, decay_step):
	# EPSILON GREEDY STRATEGY
	if self.epsilon_greedy:
	# Here we'll use an improved version of our epsilon greedy strategy for Q-learning
	explore_probability = self.epsilon_min + (self.epsilon - self.epsilon_min) * np.exp(-self.epsilon_decay * decay_step)
	# OLD EPSILON STRATEGY
	else:
	if self.epsilon > self.epsilon_min:
	self.epsilon *= (1-self.epsilon_decay)
	explore_probability = self.epsilon

	if explore_probability > np.random.rand():
	# Make a random action (exploration)
	return random.randrange(self.action_size), explore_probability
	else:
	# Get action from Q-network (exploitation)
	# Estimate the Qs values state
	# Take the biggest Q value (= the best action)
	return np.argmax(self.model.predict(state)), explore_probability
No results found