Last active
January 20, 2017 23:51
-
-
Save wiso/3518c9574e59022d8e628a8e5df4a247 to your computer and use it in GitHub Desktop.
Very simple random search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# from http://kvfrans.com/simple-algoritms-for-solving-cartpole/ | |
import gym | |
from gym import wrappers | |
import numpy as np | |
env = gym.make('CartPole-v0') | |
def run_episode(env, parameters): | |
observation = env.reset() | |
totalreward = 0 | |
for _ in xrange(2000): | |
action = 0 if np.matmul(parameters,observation) < 0 else 1 | |
observation, reward, done, info = env.step(action) | |
totalreward += reward | |
if done: | |
break | |
return totalreward | |
parameters = np.random.rand(4) * 2 - 1 | |
bestparams = None | |
bestreward = 0 | |
episodes_per_update = 5 | |
for _ in xrange(10000): | |
parameters = np.random.rand(4) * 2 - 1 | |
reward = 0 | |
for _ in xrange(episodes_per_update): | |
run = run_episode(env,parameters) | |
reward += run / episodes_per_update | |
if reward > bestreward: | |
bestreward = reward | |
bestparams = parameters | |
if reward >= 2000: | |
break | |
print bestreward | |
env = gym.make('CartPole-v0') | |
env = wrappers.Monitor(env, '/tmp/cartpole-experiment-1', force=True) | |
for i_episode in range(100): | |
print i_episode | |
observation = env.reset() | |
for _ in xrange(1000): | |
#env.render() | |
action = 0 if np.matmul(parameters, observation) < 0 else 1 | |
observation, reward, done, info = env.step(action) | |
if done: | |
break | |
env.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment