Created
July 2, 2016 02:57
-
-
Save klosowsk/0e09b2d84bffbaa21d633cc0b60a0987 to your computer and use it in GitHub Desktop.
Cart-Pole soved after 4 episodes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# CartPole-v0 | |
# A pole is attached by an un-actuated joint to a cart, | |
# which moves along a frictionless track. The system | |
# is controlled by applying a force of +1 or -1 to the | |
# cart. The pendulum starts upright, and the goal is to | |
# prevent it from falling over. A reward of +1 is provided | |
# for every timestep that the pole remains upright. | |
# The episode ends when the pole is more than 15 degrees | |
# from vertical, or the cart moves more than 2.4 units from | |
# the center. | |
import gym | |
import numpy | |
env = gym.make('CartPole-v0') | |
# This line creates a param value as a ramdon array[4] | |
param = numpy.random.rand(4) - 0.5 | |
# This line creates a monitor | |
env.monitor.start('/tmp/OpenAI-CartPole', force=True) | |
best_reward = 0 | |
for x in range(1000): | |
observation = env.reset() | |
total_reward = 0 | |
while True: | |
action = numpy.dot(param, observation) | |
action = 1 if action > 0 else 0 | |
observation, reward, done, info = env.step(action) | |
total_reward += reward | |
if done: | |
if total_reward > best_reward: | |
best_reward = total_reward | |
param += observation | |
elif total_reward < 200: | |
param -= observation | |
break | |
env.monitor.close() | |
# Code based in the code posted by @ilovelinux |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment