Last active
April 29, 2016 20:06
-
-
Save treeform/f27547c7640492f4e45c2e92d116bd95 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import random | |
env = gym.make('CartPole-v0') | |
env.monitor.start('/tmp/cartpole-experiment-3', force=True) | |
# its simulated annealing like | |
bestSteps = 0 | |
best = [0, 0, 0, 0] | |
alpha = 1 | |
for i_episode in xrange(80): | |
test = [best[i] + (random.random() - 0.5)*alpha for i in range(4)] | |
score = 0 | |
for ep in range(10): # <-- key thing was to figure out that you need to do 10 tests per point | |
observation = env.reset() | |
for t in xrange(200): # <-- because you can't go over 200 you need to gain score hight else where | |
env.render() | |
if sum(observation[i]*test[i] for i in range(4)) > 0: | |
action = 1 | |
else: | |
action = 0 | |
observation, reward, done, info = env.step(action) | |
if done: | |
break | |
score += t | |
if bestSteps < score: | |
bestSteps = score | |
best = test | |
alpha *= .9 | |
print "test:", "[%+1.2f %+1.2f %+1.2f %+1.2f]" % tuple(test), score, | |
print "best:", "[%+1.2f %+1.2f %+1.2f %+1.2f]" % tuple(best), bestSteps, alpha | |
print "best", best, bestSteps | |
env.monitor.close() |
i got 1 of 4 attempts to reproduce the results:
141.97 ± 5.43 https://gym.openai.com/evaluations/eval_lzji17hRtiTr9GKJKRClg
9.35 ± 0.08 https://gym.openai.com/evaluations/eval_VChwPSKS0K4SHEs0XRIeQ
9.31 ± 0.08 https://gym.openai.com/evaluations/eval_yvvMBaUESDiaCoZiA4VTqA
199.36 ± 0.34 https://gym.openai.com/evaluations/eval_MIy5Ru6lSKh9RcuNQLz1g
edit: the middle two runs had env.render()
commented out
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
reproduced here : https://gym.openai.com/evaluations/eval_SZdv2vuThCAu6YzikbBg