Skip to content

Instantly share code, notes, and snippets.

@JKCooper2
Created June 9, 2016 08:30
Show Gist options
  • Save JKCooper2/907ba8d6aa0bdea46448625c5f89cc8c to your computer and use it in GitHub Desktop.
Save JKCooper2/907ba8d6aa0bdea46448625c5f89cc8c to your computer and use it in GitHub Desktop.
Cartpole: for newcomers to RL - Part 1
For Section 1: https://openai.com/requests-for-research/#cartpole
Requirement of environment for algorithm to work:
- Action space has two discrete actions
- Ratio of observations can decide to the best action to take
import gym
import gym.scoreboard.scoring
from random_guess import RandomGuess
def main():
env = gym.make('CartPole-v0')
agent = RandomGuess(env.action_space, env.observation_space)
upload = True # Sets whether to upload to OpenAI
outdir = '/tmp/' + agent.name + '-results'
env.monitor.start(outdir, force=True)
episode_count = 10000
best_repeat = 200 # Times to rerun the agent that hits the max score
for i in xrange(episode_count):
# Replace agent if max score hasn't been reached
if agent.best_score < 200:
agent.create_agent()
# Else run the max scoring agent best_repeat number of times
else:
best_repeat -= 1
# Break if best_repeat has been run all the times set
if best_repeat <= 0:
break
ob = env.reset()
reward = 0
done = False
action = agent.act(ob, reward, done)
while not done:
ob, reward, done, _ = env.step(action)
action = agent.act(ob, reward, done)
print gym.scoreboard.scoring.score_from_local(outdir)
env.monitor.close()
if upload:
gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../api.txt', 'r').readline())
if __name__ == '__main__':
main()
import numpy as np
class LinearModel:
def __init__(self, n):
self.vals = np.random.randn(n)
def score(self, observation):
return sum(observation[i] * self.vals[i] for i in range(len(observation)))
from linear_model import LinearModel
class RandomGuess:
def __init__(self, action_space, observation_space):
self.name = "Random Linear Model"
self.alg_id = "alg_EaW7vxLcSWW37vPDLpSK4g"
self.action_space = action_space
self.observation_space = observation_space
self.model = None
self.best_model = None
self.best_score = 0
self.episode_reward = 0
def create_agent(self):
self.model = LinearModel(len(self.observation_space.low))
def act(self, observation, reward, done):
if self.model.score(observation) <= 0:
action = 0
else:
action = 1
self.episode_reward += reward
if done:
if self.episode_reward > self.best_score:
self.best_score = self.episode_reward
self.best_model = self.model
self.episode_reward = 0
return action
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment