JKCooper2 · June 9, 2016 08:30
diff --git a/README b/README
 For Section 1: https://openai.com/requests-for-research/#cartpole

 Requirement of environment for algorithm to work:
 - Action space has two discrete actions
 - Ratio of observations can decide to the best action to take
diff --git a/environment.py b/environment.py
 import gym
 import gym.scoreboard.scoring
 from random_guess import RandomGuess


 def main():
    env = gym.make('CartPole-v0')
    agent = RandomGuess(env.action_space, env.observation_space)
    upload = True  # Sets whether to upload to OpenAI

    outdir = '/tmp/' + agent.name + '-results'
    env.monitor.start(outdir, force=True)

    episode_count = 10000
    best_repeat = 200  # Times to rerun the agent that hits the max score

    for i in xrange(episode_count):

        # Replace agent if max score hasn't been reached
        if agent.best_score < 200:
            agent.create_agent()
        # Else run the max scoring agent best_repeat number of times
        else:
            best_repeat -= 1
        
        # Break if best_repeat has been run all the times set
        if best_repeat <= 0:
            break

        ob = env.reset()
        reward = 0
        done = False
        action = agent.act(ob, reward, done)

        while not done:
            ob, reward, done, _ = env.step(action)
            action = agent.act(ob, reward, done)

        print gym.scoreboard.scoring.score_from_local(outdir)

    env.monitor.close()

    if upload:
        gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../api.txt', 'r').readline())


 if __name__ == '__main__':
    main()
diff --git a/linear_model.py b/linear_model.py
 import numpy as np


 class LinearModel:
    def __init__(self, n):
        self.vals = np.random.randn(n)

    def score(self, observation):
        return sum(observation[i] * self.vals[i] for i in range(len(observation)))
diff --git a/random_guess.py b/random_guess.py
 from linear_model import LinearModel


 class RandomGuess:
    def __init__(self, action_space, observation_space):
        self.name = "Random Linear Model"
        self.alg_id = "alg_EaW7vxLcSWW37vPDLpSK4g"
        self.action_space = action_space
        self.observation_space = observation_space

        self.model = None
        self.best_model = None

        self.best_score = 0
        self.episode_reward = 0

    def create_agent(self):
        self.model = LinearModel(len(self.observation_space.low))

    def act(self, observation, reward, done):
        if self.model.score(observation) <= 0:
            action = 0
        else:
            action = 1

        self.episode_reward += reward

        if done:
            if self.episode_reward > self.best_score:
                self.best_score = self.episode_reward
                self.best_model = self.model
            self.episode_reward = 0

        return action
	For Section 1: https://openai.com/requests-for-research/#cartpole

	Requirement of environment for algorithm to work:
	- Action space has two discrete actions
	- Ratio of observations can decide to the best action to take
	import gym
	import gym.scoreboard.scoring
	from random_guess import RandomGuess


	def main():
	env = gym.make('CartPole-v0')
	agent = RandomGuess(env.action_space, env.observation_space)
	upload = True # Sets whether to upload to OpenAI

	outdir = '/tmp/' + agent.name + '-results'
	env.monitor.start(outdir, force=True)

	episode_count = 10000
	best_repeat = 200 # Times to rerun the agent that hits the max score

	for i in xrange(episode_count):

	# Replace agent if max score hasn't been reached
	if agent.best_score < 200:
	agent.create_agent()
	# Else run the max scoring agent best_repeat number of times
	else:
	best_repeat -= 1

	# Break if best_repeat has been run all the times set
	if best_repeat <= 0:
	break

	ob = env.reset()
	reward = 0
	done = False
	action = agent.act(ob, reward, done)

	while not done:
	ob, reward, done, _ = env.step(action)
	action = agent.act(ob, reward, done)

	print gym.scoreboard.scoring.score_from_local(outdir)

	env.monitor.close()

	if upload:
	gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../api.txt', 'r').readline())


	if __name__ == '__main__':
	main()
	import numpy as np


	class LinearModel:
	def __init__(self, n):
	self.vals = np.random.randn(n)

	def score(self, observation):
	return sum(observation[i] * self.vals[i] for i in range(len(observation)))
	from linear_model import LinearModel


	class RandomGuess:
	def __init__(self, action_space, observation_space):
	self.name = "Random Linear Model"
	self.alg_id = "alg_EaW7vxLcSWW37vPDLpSK4g"
	self.action_space = action_space
	self.observation_space = observation_space

	self.model = None
	self.best_model = None

	self.best_score = 0
	self.episode_reward = 0

	def create_agent(self):
	self.model = LinearModel(len(self.observation_space.low))

	def act(self, observation, reward, done):
	if self.model.score(observation) <= 0:
	action = 0
	else:
	action = 1

	self.episode_reward += reward

	if done:
	if self.episode_reward > self.best_score:
	self.best_score = self.episode_reward
	self.best_model = self.model
	self.episode_reward = 0

	return action