Skip to content

Instantly share code, notes, and snippets.

@baoblackcoal
Forked from JKCooper2/README
Last active June 29, 2016 01:42
Show Gist options
  • Save baoblackcoal/a26069513780b63acca3b1b619bc503e to your computer and use it in GitHub Desktop.
Save baoblackcoal/a26069513780b63acca3b1b619bc503e to your computer and use it in GitHub Desktop.
Linear Model Hill Climbing
For part 1 of https://openai.com/requests-for-research/#cartpole
Execute by command, for example:
python environment.py --env CartPole-v0 --threshold 200 --noise 33
python environment.py --env Hoodle-v0 --threshold 1 --noise 1
(Note: Hoodle-v0 that is created by myself, and I have pulled request. https://github.com/openai/gym/pull/227 )
import sys
import argparse
import gym
import gym.scoreboard.scoring
from hill_climbing import HillClimbing
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--env", required=True)
parser.add_argument("--threshold", type=int, required=True)
parser.add_argument("--noise", type=int, required=True)
args, _ = parser.parse_known_args([arg for arg in sys.argv[1:] if arg not in ('-h', '--help')])
# env = gym.make('CartPole-v0' if len(sys.argv) < 2 else sys.argv[1])
# env_name = 'CartPole-v0'
# noise = 33
# best_score_threshold = 200
# env_name = 'Hoodle-v0'
# noise = 1
# best_score_threshold = 1
env_name = args.env
noise = args.noise
best_score_threshold = args.threshold
env = gym.make(env_name)
agent = HillClimbing(env.action_space, env.observation_space, noise)
upload = False # Sets whether to upload to OpenAI
outdir = '/tmp/' + agent.name + '-results'
env.monitor.start(outdir, force=True)
episode_count = 2000
best_repeat = 200
for i in xrange(episode_count):
if agent.best_score < best_score_threshold:
agent.mutate()
else:
if best_repeat == best_score_threshold: print '\n\nGot best score, i=', i
best_repeat -= 1
if best_repeat <= 0:
print "Complete"
break
ob = env.reset()
reward = 0
done = False
action = agent.act(ob, reward, done)
while not done:
ob, reward, done, _ = env.step(action)
action = agent.act(ob, reward, done)
print gym.scoreboard.scoring.score_from_local(outdir)
env.monitor.close()
if upload:
gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../api_key.txt', 'r').readline())
if __name__ == '__main__':
main()
from linear_model import LinearModel
import numpy as np
class HillClimbing:
def __init__(self, action_space, observation_space, noise=0.5):
self.name = "Hill Climbing"
self.alg_id = "alg_WKinUO3TNabzwPeaD7A"
self.action_space = action_space
self.observation_space = observation_space
self.model = LinearModel(len(self.observation_space.low))
self.noise = noise # 1 standard deviation of motion
self.best_score = -1
self.episode_reward = 0
def mutate(self):
self.model.mutate(self.noise)
def act(self, observation, reward, done):
ob_score = self.model.score(observation)
ob_score = ob_score if ob_score >= 0 else 2 * abs(ob_score)
action = int(ob_score % self.action_space.n)
# print('ob_score', ob_score, 'action=%d', action)
self.episode_reward += reward
if done:
if self.episode_reward > self.best_score:
self.best_score = self.episode_reward
self.model.set_best_vals() # Set the best vals found
else:
self.model.revert() # Reverts the model to the best vals found so far
self.episode_reward = 0
return action
import numpy as np
import copy
class LinearModel:
def __init__(self, n):
self.n = n
self.vals = np.random.randn(n)
self.best_vals = None
def score(self, observation):
return np.dot(observation, self.vals)
def mutate(self, noise):
self.vals += (np.random.uniform(size=self.n) - 0.5) * noise
def set_best_vals(self):
self.best_vals = copy.copy(self.vals)
def revert(self):
self.vals = copy.copy(self.best_vals)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment