Skip to content

Instantly share code, notes, and snippets.

@JKCooper2
Created June 18, 2016 03:41
Show Gist options
  • Save JKCooper2/a387b42d2c26732a043ce9029a6555c9 to your computer and use it in GitHub Desktop.
Save JKCooper2/a387b42d2c26732a043ce9029a6555c9 to your computer and use it in GitHub Desktop.

Random agent with action_space value checking

import gym
import gym.scoreboard.scoring
from random_agent import RandomAgent
ENVS = ["PredictActionsCartpole-v0",
"PredictObsCartpole-v0",
"OffSwitchCartpole-v0",
"SemisuperPendulumNoise-v0",
"SemisuperPendulumRandom-v0",
"SemisuperPendulumDecay-v0"]
def main():
for env_name in ENVS:
env = gym.make(env_name)
agent = RandomAgent(env.action_space, env.observation_space)
out_dir = '/tmp/' + agent.name + '-results'
env.monitor.start(out_dir, force=True, video_callable=False)
n_episodes = 1000
for i_episode in range(n_episodes):
observation = env.reset()
reward = 0
done = False
action = agent.act(observation, reward, done)
while not done:
observation, reward, done, info = env.step(action)
action = agent.act(observation, reward, done)
print gym.scoreboard.scoring.score_from_local(out_dir)
env.monitor.close()
gym.upload(out_dir, algorithm_id=agent.alg_id, api_key="YOUR_API_KEY")
if __name__ == '__main__':
main()
import numpy as np
class RandomAgent(object):
def __init__(self, action_space, observation_space):
self.action_space = action_space
self.observation_space = observation_space
self.name = 'random'
self.alg_id = "alg_MhPaN5c4TJOFS4tVFh8x3A"
def act(self, observation, reward, done):
return self.__validate_action(self.action_space.sample())
# Checks for nan's in action_space samples and replaces with random value selected from a normal distribution(0, 1)
def __validate_action(self, action):
if hasattr(action, '__iter__'):
for i in range(len(action)):
self.__validate_action(action[i])
elif np.isnan(action):
action = np.random.normal(0, 1.0)
return action
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment