Last active
May 7, 2016 02:22
-
-
Save nottombrown/30607018c912422ffdab2425b0b72768 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # A "put it in a dictionary" Q-learning agent | |
| import gym | |
| import numpy as np | |
| env = gym.make('FrozenLake-v0') | |
| env.monitor.start('recordings', force=True) | |
| # Initialize Q matrix to zeros | |
| Q = np.zeros((env.observation_space.n, env.action_space.n)) | |
| # Probability of random action | |
| epsilon = 0.5 | |
| epsilon_decay = 0.98 | |
| # Learning rate | |
| alpha = 0.1 | |
| num_episodes = 5000 | |
| for _ in xrange(num_episodes): | |
| observation = env.reset() | |
| done = False | |
| while not done: | |
| # Current state | |
| state = observation | |
| if np.random.rand() > epsilon: | |
| # Choose best action according to current Q matrix | |
| action = np.argmax(Q[state, :]) | |
| else: | |
| # Take a random action | |
| action = env.action_space.sample() | |
| # Take action and observe state and reward | |
| observation, reward, done, info = env.step(action) | |
| reward -= -0.001 # cost of life | |
| # Q-learning update | |
| Q[state, action] += alpha * (reward + np.max(Q[observation, :]) - Q[state, action]) | |
| # Decay epsilon | |
| epsilon *= epsilon_decay | |
| env.monitor.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi
I tried to reproduce your result on my computer, but the produced result is somehow different from what I expected. I run simulation twice and uploaded the results on OpenAI Gym. Do you happen to know why I failed to reproduce your result? I just run your code without any modification.
1st try to reproduce your result: https://gym.openai.com/evaluations/eval_Os9pXNZSbO7BK96YOPIA
2nd try to reproduce your result: https://gym.openai.com/evaluations/eval_S0RqhoOySDKO9KLCbEXyqQ
Best regards