Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save pranavkantgaur/d668cd1d3113ae72f4f705a14d675357 to your computer and use it in GitHub Desktop.
Save pranavkantgaur/d668cd1d3113ae72f4f705a14d675357 to your computer and use it in GitHub Desktop.
annealed epsilon greedy agent for CartPole problem
'''this code implements annealed epsilon greedy ml agent for CartPole problem '''
import numpy as np
import gym
class myAnnealedEpsilonGreedyCartPoleAgent(object):
def __init__(self, action_space):
self.action_space = action_space
assert (action_space, gym.spaces.discrete.Discrete), "unsupported action space for now."
def act(self, state, reward, time_step, done, default_epsilon = 0.98):
# TODO compute epsilon for current iteration
# annealing will follow linear model to reduce over time.
current_epsilon = default_epsilon / (time_step + 1.0)
# take action based on sample from epsilon distribution
if np.random.random() < current_epsilon:
return self.action_space.sample() # explore
else:
return 1 # exploite TODO to be replaced by policy gradient based agent model
if __name__ == '__main__':
# instantiate environment
env = gym.make('CartPole-v0')
# instantiate agent
agent = myAnnealedEpsilonGreedyCartPoleAgent(env.action_space)
# task-specific configurations
max_episodes = 500
max_steps_per_episodes = 200
sum_reward_running = 0.0
current_reward = 0.0
done = False
for i in xrange(max_episodes):
current_observation = env.reset()
sum_rewards = 0.0
last_reward = 0.0
for j in xrange(max_steps_per_episodes):
action = agent.act(current_observation, last_reward, j, done) # agent is invoked
env.render()
next_observation, current_reward, done, _ = env.step(action) # enviroment is acted upon
sum_rewards += current_reward
if done:
break
else:
current_observation = next_observation
last_reward = current_reward
sum_reward_running = 0.95 * sum_reward_running + 0.05 * sum_rewards
print '%d running reward: %d' % (i, sum_reward_running)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment