-
-
Save baoblackcoal/a26069513780b63acca3b1b619bc503e to your computer and use it in GitHub Desktop.
Linear Model Hill Climbing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| For part 1 of https://openai.com/requests-for-research/#cartpole | |
| Execute by command, for example: | |
| python environment.py --env CartPole-v0 --threshold 200 --noise 33 | |
| python environment.py --env Hoodle-v0 --threshold 1 --noise 1 | |
| (Note: Hoodle-v0 that is created by myself, and I have pulled request. https://github.com/openai/gym/pull/227 ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import argparse | |
| import gym | |
| import gym.scoreboard.scoring | |
| from hill_climbing import HillClimbing | |
| def main(): | |
| parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
| parser.add_argument("--env", required=True) | |
| parser.add_argument("--threshold", type=int, required=True) | |
| parser.add_argument("--noise", type=int, required=True) | |
| args, _ = parser.parse_known_args([arg for arg in sys.argv[1:] if arg not in ('-h', '--help')]) | |
| # env = gym.make('CartPole-v0' if len(sys.argv) < 2 else sys.argv[1]) | |
| # env_name = 'CartPole-v0' | |
| # noise = 33 | |
| # best_score_threshold = 200 | |
| # env_name = 'Hoodle-v0' | |
| # noise = 1 | |
| # best_score_threshold = 1 | |
| env_name = args.env | |
| noise = args.noise | |
| best_score_threshold = args.threshold | |
| env = gym.make(env_name) | |
| agent = HillClimbing(env.action_space, env.observation_space, noise) | |
| upload = False # Sets whether to upload to OpenAI | |
| outdir = '/tmp/' + agent.name + '-results' | |
| env.monitor.start(outdir, force=True) | |
| episode_count = 2000 | |
| best_repeat = 200 | |
| for i in xrange(episode_count): | |
| if agent.best_score < best_score_threshold: | |
| agent.mutate() | |
| else: | |
| if best_repeat == best_score_threshold: print '\n\nGot best score, i=', i | |
| best_repeat -= 1 | |
| if best_repeat <= 0: | |
| print "Complete" | |
| break | |
| ob = env.reset() | |
| reward = 0 | |
| done = False | |
| action = agent.act(ob, reward, done) | |
| while not done: | |
| ob, reward, done, _ = env.step(action) | |
| action = agent.act(ob, reward, done) | |
| print gym.scoreboard.scoring.score_from_local(outdir) | |
| env.monitor.close() | |
| if upload: | |
| gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../api_key.txt', 'r').readline()) | |
| if __name__ == '__main__': | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from linear_model import LinearModel | |
| import numpy as np | |
| class HillClimbing: | |
| def __init__(self, action_space, observation_space, noise=0.5): | |
| self.name = "Hill Climbing" | |
| self.alg_id = "alg_WKinUO3TNabzwPeaD7A" | |
| self.action_space = action_space | |
| self.observation_space = observation_space | |
| self.model = LinearModel(len(self.observation_space.low)) | |
| self.noise = noise # 1 standard deviation of motion | |
| self.best_score = -1 | |
| self.episode_reward = 0 | |
| def mutate(self): | |
| self.model.mutate(self.noise) | |
| def act(self, observation, reward, done): | |
| ob_score = self.model.score(observation) | |
| ob_score = ob_score if ob_score >= 0 else 2 * abs(ob_score) | |
| action = int(ob_score % self.action_space.n) | |
| # print('ob_score', ob_score, 'action=%d', action) | |
| self.episode_reward += reward | |
| if done: | |
| if self.episode_reward > self.best_score: | |
| self.best_score = self.episode_reward | |
| self.model.set_best_vals() # Set the best vals found | |
| else: | |
| self.model.revert() # Reverts the model to the best vals found so far | |
| self.episode_reward = 0 | |
| return action |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import copy | |
| class LinearModel: | |
| def __init__(self, n): | |
| self.n = n | |
| self.vals = np.random.randn(n) | |
| self.best_vals = None | |
| def score(self, observation): | |
| return np.dot(observation, self.vals) | |
| def mutate(self, noise): | |
| self.vals += (np.random.uniform(size=self.n) - 0.5) * noise | |
| def set_best_vals(self): | |
| self.best_vals = copy.copy(self.vals) | |
| def revert(self): | |
| self.vals = copy.copy(self.best_vals) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment