ravishchawla · September 27, 2019 13:57
diff --git a/duel-network-3.py b/duel-network-3.py
 state, dqn_agent = env.reset(train_mode=True)[brain_name].vector_observations[0], Agent(state_size, action_size, 1024);

 scores, discount = [], EPS;

 for ite in range(1, num_iterations+1):
    score, env_info = 0, env.reset(train_mode=True)[brain_name];
    state = env_info.vector_observations[0];
    
    for t_step in range(max_timesteps):
        action = dqn_agent.act(state, discount);
        env_info = env.step(action)[brain_name];
        next_state = env_info.vector_observations[0];
        
        reward, done = env_info.rewards[0], env_info.local_done[0];
        
        dqn_agent.step(state, action, reward, next_state, done);
        score, state = score + reward, next_state;
        
        if done:
            break;
        
    scores.append(score);
    discount = max(EPS_LIMIT, EPS_DECAY * discount);
	state, dqn_agent = env.reset(train_mode=True)[brain_name].vector_observations[0], Agent(state_size, action_size, 1024);

	scores, discount = [], EPS;

	for ite in range(1, num_iterations+1):
	score, env_info = 0, env.reset(train_mode=True)[brain_name];
	state = env_info.vector_observations[0];

	for t_step in range(max_timesteps):
	action = dqn_agent.act(state, discount);
	env_info = env.step(action)[brain_name];
	next_state = env_info.vector_observations[0];

	reward, done = env_info.rewards[0], env_info.local_done[0];

	dqn_agent.step(state, action, reward, next_state, done);
	score, state = score + reward, next_state;

	if done:
	break;

	scores.append(score);
	discount = max(EPS_LIMIT, EPS_DECAY * discount);