JKCooper2 · June 18, 2016 03:41
diff --git a/README.md b/README.md
diff --git a/environment.py b/environment.py
 import gym
 import gym.scoreboard.scoring

 from random_agent import RandomAgent

 ENVS = ["PredictActionsCartpole-v0",
        "PredictObsCartpole-v0",
        "OffSwitchCartpole-v0",
        "SemisuperPendulumNoise-v0",
        "SemisuperPendulumRandom-v0",
        "SemisuperPendulumDecay-v0"]


 def main():
    for env_name in ENVS:
        env = gym.make(env_name)

        agent = RandomAgent(env.action_space, env.observation_space)

        out_dir = '/tmp/' + agent.name + '-results'
        env.monitor.start(out_dir, force=True, video_callable=False)

        n_episodes = 1000
        for i_episode in range(n_episodes):

            observation = env.reset()
            reward = 0
            done = False

            action = agent.act(observation, reward, done)

            while not done:
                observation, reward, done, info = env.step(action)
                action = agent.act(observation, reward, done)

            print gym.scoreboard.scoring.score_from_local(out_dir)

        env.monitor.close()
        gym.upload(out_dir, algorithm_id=agent.alg_id, api_key="YOUR_API_KEY")


 if __name__ == '__main__':
    main()
diff --git a/random_agent.py b/random_agent.py
 import numpy as np


 class RandomAgent(object):
    def __init__(self, action_space, observation_space):
        self.action_space = action_space
        self.observation_space = observation_space
        self.name = 'random'
        self.alg_id = "alg_MhPaN5c4TJOFS4tVFh8x3A"

    def act(self, observation, reward, done):
        return self.__validate_action(self.action_space.sample())

    # Checks for nan's in action_space samples and replaces with random value selected from a normal distribution(0, 1)
    def __validate_action(self, action):
        if hasattr(action, '__iter__'):
            for i in range(len(action)):
                self.__validate_action(action[i])
        elif np.isnan(action):
            action = np.random.normal(0, 1.0)

        return action
	import gym
	import gym.scoreboard.scoring

	from random_agent import RandomAgent

	ENVS = ["PredictActionsCartpole-v0",
	"PredictObsCartpole-v0",
	"OffSwitchCartpole-v0",
	"SemisuperPendulumNoise-v0",
	"SemisuperPendulumRandom-v0",
	"SemisuperPendulumDecay-v0"]


	def main():
	for env_name in ENVS:
	env = gym.make(env_name)

	agent = RandomAgent(env.action_space, env.observation_space)

	out_dir = '/tmp/' + agent.name + '-results'
	env.monitor.start(out_dir, force=True, video_callable=False)

	n_episodes = 1000
	for i_episode in range(n_episodes):

	observation = env.reset()
	reward = 0
	done = False

	action = agent.act(observation, reward, done)

	while not done:
	observation, reward, done, info = env.step(action)
	action = agent.act(observation, reward, done)

	print gym.scoreboard.scoring.score_from_local(out_dir)

	env.monitor.close()
	gym.upload(out_dir, algorithm_id=agent.alg_id, api_key="YOUR_API_KEY")


	if __name__ == '__main__':
	main()
	import numpy as np


	class RandomAgent(object):
	def __init__(self, action_space, observation_space):
	self.action_space = action_space
	self.observation_space = observation_space
	self.name = 'random'
	self.alg_id = "alg_MhPaN5c4TJOFS4tVFh8x3A"

	def act(self, observation, reward, done):
	return self.__validate_action(self.action_space.sample())

	# Checks for nan's in action_space samples and replaces with random value selected from a normal distribution(0, 1)
	def __validate_action(self, action):
	if hasattr(action, '__iter__'):
	for i in range(len(action)):
	self.__validate_action(action[i])
	elif np.isnan(action):
	action = np.random.normal(0, 1.0)

	return action