Skip to content

Instantly share code, notes, and snippets.

@lorenzotinfena
Last active April 13, 2022 17:27
Show Gist options
  • Save lorenzotinfena/d761f53b881281943d6f21b0c5764e88 to your computer and use it in GitHub Desktop.
Save lorenzotinfena/d761f53b881281943d6f21b0c5764e88 to your computer and use it in GitHub Desktop.
3.code inside "A formal introduction to Deep Reinforcement Learning"
state = get_first_state()
while not done:
# policy
if random ∈[0,1] < epsilon: action = random_action
else: action = argmax(nn.predict(state))
next_state, reward, done = step(action) # perform the action
replay_memory.put(state, action, reward, done, next_state) # save in replay memory
if len(replay_memory) >= batch_size: # if there is enough memory
# get a mini-batch from the replay memory
for state_exp, action_exp, reward_exp, done_exp, next_state_exp in replay_memory.get_random(batch_size):
q_values = nn.predict(state_exp) # check Q-values
q_values_target = copy(q_values)
if done: q_values_target[action_exp] = reward_exp
else: q_values_target[action_exp] =
# compute a target Q-values con la target network
reward_exp + discount_factor * max(target_nn.predict(next_state_exp))
nn.train(state_exp, q_values_target, learning_rate, momentum) # optimization
if steps mod steps_to_sync_target_nn == 0:
self._sync_target_nn_weights() # sync target nn with main nn
epsilon *= epsilon_decay # epsilon-decay algorithm
if epsilon < min_epsilon: epsilon = min_epsilon
state = next_state # set the new current state
self._sync_target_nn_weights() # sync target nn with the main nn even at the end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment