Last active
June 15, 2016 12:35
-
-
Save JKCooper2/73153c409f21d42e2976cc640ec58f4f to your computer and use it in GitHub Desktop.
Linear Model Hill Climbing for CartPole
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
For part 1 of https://openai.com/requests-for-research/#cartpole | |
Quite often it doesn't solve (because of local minimum) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import gym.scoreboard.scoring | |
from hill_climbing import HillClimbing | |
def main(): | |
env = gym.make('CartPole-v0') | |
agent = HillClimbing(env.action_space, env.observation_space) | |
upload = True # Sets whether to upload to OpenAI | |
outdir = '/tmp/' + agent.name + '-results' | |
env.monitor.start(outdir, force=True) | |
episode_count = 2000 | |
best_repeat = 200 | |
for i in xrange(episode_count): | |
if agent.best_score < 200: | |
agent.mutate() | |
else: | |
best_repeat -= 1 | |
if best_repeat <= 0: | |
print "Complete" | |
break | |
ob = env.reset() | |
reward = 0 | |
done = False | |
action = agent.act(ob, reward, done) | |
while not done: | |
ob, reward, done, _ = env.step(action) | |
action = agent.act(ob, reward, done) | |
print gym.scoreboard.scoring.score_from_local(outdir) | |
env.monitor.close() | |
if upload: | |
gym.upload(outdir, algorithm_id=agent.alg_id, api_key=open('../../../../api.txt', 'r').readline()) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from linear_model import LinearModel | |
class HillClimbing: | |
def __init__(self, action_space, observation_space, noise=0.5): | |
self.name = "Hill Climbing" | |
self.alg_id = "alg_WKinUO3TNabzwPeaD7A" | |
self.action_space = action_space | |
self.observation_space = observation_space | |
self.model = LinearModel(len(self.observation_space.low)) | |
self.noise = noise # 1 standard deviation of motion | |
self.best_score = 0 | |
self.episode_reward = 0 | |
def mutate(self): | |
self.model.mutate(self.noise) | |
def act(self, observation, reward, done): | |
if self.model.score(observation) <= 0: | |
action = 0 | |
else: | |
action = 1 | |
self.episode_reward += reward | |
if done: | |
if self.episode_reward > self.best_score: | |
self.best_score = self.episode_reward | |
self.model.set_best_vals() # Set the best vals found | |
else: | |
self.model.revert() # Reverts the model to the best vals found so far | |
self.episode_reward = 0 | |
return action |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import copy | |
class LinearModel: | |
def __init__(self, n): | |
self.vals = np.random.randn(n) | |
self.best_vals = None | |
def score(self, observation): | |
return sum(observation[i] * self.vals[i] for i in range(len(observation))) | |
def mutate(self, noise): | |
for i in range(len(self.vals)): | |
self.vals[i] += np.random.normal() * noise | |
def set_best_vals(self): | |
self.best_vals = copy.copy(self.vals) | |
def revert(self): | |
self.vals = copy.copy(self.best_vals) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment