Last active
May 6, 2016 01:05
-
-
Save JKCooper2/5998c3bbb387ea9e8a18ce29e2e335b8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# CARTPOLE MULTI AGENT | |
# Set up to allow for using a pool of agents | |
import logging | |
import gym | |
from CrossEntropyMethod import CrossEntropyMethodPool | |
import gym.scoreboard.scoring | |
import gym.monitoring.monitor | |
def main(): | |
logger = logging.getLogger() | |
logger.setLevel(logging.INFO) | |
outdir = '/tmp/cem-results/' | |
agents = CrossEntropyMethodPool('CartPole-v0', outdir, n_samples=5, top_n=.4) | |
episodes = 30 | |
for i in range(episodes): | |
agents.train() | |
agents.update() | |
print gym.scoreboard.scoring.score_from_local(outdir) | |
gym.monitoring.monitor.close_all_monitors() | |
# Upload to the scoreboard. | |
upload = True # Sets whether to upload to OpenAI | |
if upload: | |
logger.info("Complete. Uploading Results") | |
gym.upload(outdir, algorithm_id="pool-cem", api_key="api_key") | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import gym | |
import gym.monitoring.monitor | |
np.random.seed(0) | |
class CrossEntropyMethodAgent(object): | |
def __init__(self): | |
self.name = "cem" | |
self.mean = None | |
def choose_action(self, observation): | |
action_score = sum([observation[i] * self.mean[i] for i in range(len(observation))]) | |
return int(action_score > 0) # Return 1 if score > 0, else 0 | |
def act(self, observation, reward, done): | |
# If first time running set up distribution values for each observed value | |
if self.mean is None: | |
self.mean = np.random.randn(len(observation)) | |
return self.choose_action(observation) | |
def update(self, mean, variance): | |
self.mean = np.random.normal(mean, variance) | |
def capped_cubic_video_schedule_single(episode_id, monitor_id): | |
if monitor_id != 0: | |
return False | |
if episode_id < 1000: | |
return int(round(episode_id ** (1. / 3))) ** 3 == episode_id | |
else: | |
return episode_id % 1000 == 0 | |
class CrossEntropyMethodPool(object): | |
def __init__(self, env, path, n_samples=10, top_n=0.2): | |
self.name = "cem" | |
self.env = env | |
self.n_samples = n_samples | |
self.path = path | |
self.agent_pool = [CrossEntropyMethodAgent() for _ in range(n_samples)] | |
self.env_pool = [gym.make(self.env) for _ in range(self.n_samples)] | |
for env in self.env_pool: | |
env.monitor.start(self.path, force=True, n_monitors=self.n_samples, video_callable=capped_cubic_video_schedule_single) | |
self.top_n = int(top_n * n_samples) | |
self.mean = None | |
self.variance = None | |
self.obs_length = None | |
self.rewards = None | |
def run_episode(self, env, agent): | |
reward = 0 | |
ep_reward = 0 | |
done = False | |
ob = env.reset() | |
while not done: | |
action = agent.act(ob, reward, done) | |
ob, reward, done, _ = env.step(action) | |
ep_reward += reward | |
return ep_reward | |
# Trains all agents with their current settings | |
def train(self): | |
if self.obs_length is None: | |
self.obs_length = self.env_pool[0].observation_space.shape[0] | |
self.rewards = [self.run_episode(self.env_pool[i], self.agent_pool[i]) for i in range(self.n_samples)] | |
# Selects top_n agents and creates next generation | |
def update(self): | |
# Get the id's of the top_n models | |
top_id = np.array(self.rewards).argsort()[-self.top_n:][::-1] | |
np_top = np.array([self.agent_pool[i].mean for i in top_id]) | |
self.mean = np.mean(np_top, axis=0) | |
self.variance = np.var(np_top, axis=0) | |
self.variance = np.array([max(self.variance[i], 0.0001) for i in range(len(self.variance))]) | |
for agent in self.agent_pool: | |
agent.update(self.mean, self.variance) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment