Skip to content

Instantly share code, notes, and snippets.

@ravishchawla
Last active December 6, 2019 16:30
Show Gist options
  • Save ravishchawla/e4a3222fe20440ef4f60b5d57aa6176d to your computer and use it in GitHub Desktop.
Save ravishchawla/e4a3222fe20440ef4f60b5d57aa6176d to your computer and use it in GitHub Desktop.
def multi_ddpg(n_episodes=5000, max_t=2000):
init_time = time.time();
scores_deque = deque(maxlen=100);
scores = []
max_score = -np.Inf;
for i_episode in range(1, n_episodes+1):
ep_init_time = time.time();
env_info = env.reset(train_mode=True)[brain_name];
states = env_info.vector_observations;
agent.reset();
agent_scores = np.zeros(num_agents);
for t_step in range(max_t):
actions = agent.act(states);
next_env_info = env.step(actions)[brain_name];
next_states, rewards, dones = next_env_info.vector_observations, next_env_info.rewards, next_env_info.local_done;
agent.step(states, actions, rewards, next_states, dones);
agent_scores = agent_scores + rewards;
states = next_states;
if np.any(dones):
break;
best_score = np.max(agent_scores);
scores_deque.append(best_score);
scores.append(best_score);
epi_score = np.mean(scores_deque);
sys.stdout.write('\rEpisode %d\t Done in %.2f/%.2f\t Scores: %s\tAvg Score %.2f'%(i_episode, (time.time() - ep_init_time), (time.time() - init_time), ['%.2f'%sc for sc in agent_scores], np.mean(scores_deque)))
if i_episode > 100 and epi_score >=0.5:
if epi_score > max_score:
max_score = epi_score;
agent.save(i_episode);
sys.stdout.write('\nEnvironment Solved: %d\t Done in %.2f/%.2f\t Scores: %s\tAvg Score %.2f'%(i_episode, (time.time() - ep_init_time), (time.time() - init_time), ['%.2f'%sc for sc in agent_scores], np.mean(scores_deque)))
return scores;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment