baoblackcoal · June 29, 2016 01:42
diff --git a/README b/README
 For part 1 of https://openai.com/requests-for-research/#cartpole

 Execute by command, for example:

 python environment.py --env CartPole-v0 --threshold 200  --noise 33
 python environment.py --env Hoodle-v0 --threshold 1  --noise 1
 (Note: Hoodle-v0 that is created by myself, and I have pulled request. https://github.com/openai/gym/pull/227 )
diff --git a/environment.py b/environment.py
 import sys
 import argparse
 import gym
 import gym.scoreboard.scoring
 from hill_climbing import HillClimbing


 def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--env", required=True)
    parser.add_argument("--threshold", type=int, required=True)
    parser.add_argument("--noise", type=int, required=True)
    args, _ = parser.parse_known_args([arg for arg in sys.argv[1:] if arg not in ('-h', '--help')])

    # env = gym.make('CartPole-v0' if len(sys.argv) < 2 else sys.argv[1])
    # env_name = 'CartPole-v0'
    # noise = 33
    # best_score_threshold = 200

    # env_name = 'Hoodle-v0'
    # noise = 1
    # best_score_threshold = 1

    env_name = args.env
    noise = args.noise
    best_score_threshold = args.threshold

    env = gym.make(env_name)
    agent = HillClimbing(env.action_space, env.observation_space, noise)
    upload = False  # Sets whether to upload to OpenAI

    outdir = '/tmp/' + agent.name + '-results'
    env.monitor.start(outdir, force=True)

    episode_count = 2000
    best_repeat = 200

    for i in xrange(episode_count):

        if agent.best_score < best_score_threshold:
            agent.mutate()
        else:
            if best_repeat == best_score_threshold: print '\n\nGot best score, i=', i
            best_repeat -= 1

        if best_repeat <= 0:
            print "Complete"
            break

        ob = env.reset()
        reward = 0
        done = False
        action = agent.act(ob, reward, done)

        while not done:
            ob, reward, done, _ = env.step(action)
            action = agent.act(ob, reward, done)

        print gym.scoreboard.scoring.score_from_local(outdir)

    env.monitor.close()

    if upload:
        gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../api_key.txt', 'r').readline())


 if __name__ == '__main__':
    main()
diff --git a/hill_climbing.py b/hill_climbing.py
 from linear_model import LinearModel
 import numpy as np


 class HillClimbing:
    def __init__(self, action_space, observation_space, noise=0.5):
        self.name = "Hill Climbing"
        self.alg_id = "alg_WKinUO3TNabzwPeaD7A"
        self.action_space = action_space
        self.observation_space = observation_space

        self.model = LinearModel(len(self.observation_space.low))
        self.noise = noise  # 1 standard deviation of motion

        self.best_score = -1
        self.episode_reward = 0

    def mutate(self):
        self.model.mutate(self.noise)

    def act(self, observation, reward, done):
        ob_score = self.model.score(observation)
        ob_score = ob_score if ob_score >= 0 else 2 * abs(ob_score)
        action = int(ob_score % self.action_space.n)
        # print('ob_score', ob_score, 'action=%d', action)

        self.episode_reward += reward

        if done:
            if self.episode_reward > self.best_score:
                self.best_score = self.episode_reward
                self.model.set_best_vals()  # Set the best vals found

            else:
                self.model.revert()  # Reverts the model to the best vals found so far

            self.episode_reward = 0

        return action
diff --git a/linear_model.py b/linear_model.py
 import numpy as np
 import copy


 class LinearModel:
    def __init__(self, n):
        self.n = n
        self.vals = np.random.randn(n)
        self.best_vals = None

    def score(self, observation):
        return np.dot(observation, self.vals)

    def mutate(self, noise):
        self.vals += (np.random.uniform(size=self.n) - 0.5) * noise

    def set_best_vals(self):
        self.best_vals = copy.copy(self.vals)

    def revert(self):
        self.vals = copy.copy(self.best_vals)
	For part 1 of https://openai.com/requests-for-research/#cartpole

	Execute by command, for example:

	python environment.py --env CartPole-v0 --threshold 200 --noise 33
	python environment.py --env Hoodle-v0 --threshold 1 --noise 1
	(Note: Hoodle-v0 that is created by myself, and I have pulled request. https://github.com/openai/gym/pull/227 )
	import sys
	import argparse
	import gym
	import gym.scoreboard.scoring
	from hill_climbing import HillClimbing


	def main():
	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument("--env", required=True)
	parser.add_argument("--threshold", type=int, required=True)
	parser.add_argument("--noise", type=int, required=True)
	args, _ = parser.parse_known_args([arg for arg in sys.argv[1:] if arg not in ('-h', '--help')])

	# env = gym.make('CartPole-v0' if len(sys.argv) < 2 else sys.argv[1])
	# env_name = 'CartPole-v0'
	# noise = 33
	# best_score_threshold = 200

	# env_name = 'Hoodle-v0'
	# noise = 1
	# best_score_threshold = 1

	env_name = args.env
	noise = args.noise
	best_score_threshold = args.threshold

	env = gym.make(env_name)
	agent = HillClimbing(env.action_space, env.observation_space, noise)
	upload = False # Sets whether to upload to OpenAI

	outdir = '/tmp/' + agent.name + '-results'
	env.monitor.start(outdir, force=True)

	episode_count = 2000
	best_repeat = 200

	for i in xrange(episode_count):

	if agent.best_score < best_score_threshold:
	agent.mutate()
	else:
	if best_repeat == best_score_threshold: print '\n\nGot best score, i=', i
	best_repeat -= 1

	if best_repeat <= 0:
	print "Complete"
	break

	ob = env.reset()
	reward = 0
	done = False
	action = agent.act(ob, reward, done)

	while not done:
	ob, reward, done, _ = env.step(action)
	action = agent.act(ob, reward, done)

	print gym.scoreboard.scoring.score_from_local(outdir)

	env.monitor.close()

	if upload:
	gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../api_key.txt', 'r').readline())


	if __name__ == '__main__':
	main()
	from linear_model import LinearModel
	import numpy as np


	class HillClimbing:
	def __init__(self, action_space, observation_space, noise=0.5):
	self.name = "Hill Climbing"
	self.alg_id = "alg_WKinUO3TNabzwPeaD7A"
	self.action_space = action_space
	self.observation_space = observation_space

	self.model = LinearModel(len(self.observation_space.low))
	self.noise = noise # 1 standard deviation of motion

	self.best_score = -1
	self.episode_reward = 0

	def mutate(self):
	self.model.mutate(self.noise)

	def act(self, observation, reward, done):
	ob_score = self.model.score(observation)
	ob_score = ob_score if ob_score >= 0 else 2 * abs(ob_score)
	action = int(ob_score % self.action_space.n)
	# print('ob_score', ob_score, 'action=%d', action)

	self.episode_reward += reward

	if done:
	if self.episode_reward > self.best_score:
	self.best_score = self.episode_reward
	self.model.set_best_vals() # Set the best vals found

	else:
	self.model.revert() # Reverts the model to the best vals found so far

	self.episode_reward = 0

	return action
	import numpy as np
	import copy


	class LinearModel:
	def __init__(self, n):
	self.n = n
	self.vals = np.random.randn(n)
	self.best_vals = None

	def score(self, observation):
	return np.dot(observation, self.vals)

	def mutate(self, noise):
	self.vals += (np.random.uniform(size=self.n) - 0.5) * noise

	def set_best_vals(self):
	self.best_vals = copy.copy(self.vals)

	def revert(self):
	self.vals = copy.copy(self.best_vals)