Created
June 9, 2016 08:30
-
-
Save JKCooper2/907ba8d6aa0bdea46448625c5f89cc8c to your computer and use it in GitHub Desktop.
Cartpole: for newcomers to RL - Part 1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
For Section 1: https://openai.com/requests-for-research/#cartpole | |
Requirement of environment for algorithm to work: | |
- Action space has two discrete actions | |
- Ratio of observations can decide to the best action to take |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import gym.scoreboard.scoring | |
from random_guess import RandomGuess | |
def main(): | |
env = gym.make('CartPole-v0') | |
agent = RandomGuess(env.action_space, env.observation_space) | |
upload = True # Sets whether to upload to OpenAI | |
outdir = '/tmp/' + agent.name + '-results' | |
env.monitor.start(outdir, force=True) | |
episode_count = 10000 | |
best_repeat = 200 # Times to rerun the agent that hits the max score | |
for i in xrange(episode_count): | |
# Replace agent if max score hasn't been reached | |
if agent.best_score < 200: | |
agent.create_agent() | |
# Else run the max scoring agent best_repeat number of times | |
else: | |
best_repeat -= 1 | |
# Break if best_repeat has been run all the times set | |
if best_repeat <= 0: | |
break | |
ob = env.reset() | |
reward = 0 | |
done = False | |
action = agent.act(ob, reward, done) | |
while not done: | |
ob, reward, done, _ = env.step(action) | |
action = agent.act(ob, reward, done) | |
print gym.scoreboard.scoring.score_from_local(outdir) | |
env.monitor.close() | |
if upload: | |
gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../api.txt', 'r').readline()) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class LinearModel: | |
def __init__(self, n): | |
self.vals = np.random.randn(n) | |
def score(self, observation): | |
return sum(observation[i] * self.vals[i] for i in range(len(observation))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from linear_model import LinearModel | |
class RandomGuess: | |
def __init__(self, action_space, observation_space): | |
self.name = "Random Linear Model" | |
self.alg_id = "alg_EaW7vxLcSWW37vPDLpSK4g" | |
self.action_space = action_space | |
self.observation_space = observation_space | |
self.model = None | |
self.best_model = None | |
self.best_score = 0 | |
self.episode_reward = 0 | |
def create_agent(self): | |
self.model = LinearModel(len(self.observation_space.low)) | |
def act(self, observation, reward, done): | |
if self.model.score(observation) <= 0: | |
action = 0 | |
else: | |
action = 1 | |
self.episode_reward += reward | |
if done: | |
if self.episode_reward > self.best_score: | |
self.best_score = self.episode_reward | |
self.best_model = self.model | |
self.episode_reward = 0 | |
return action |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment