Random agent with action_space value checking
Created
June 18, 2016 03:41
-
-
Save JKCooper2/a387b42d2c26732a043ce9029a6555c9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import gym.scoreboard.scoring | |
from random_agent import RandomAgent | |
ENVS = ["PredictActionsCartpole-v0", | |
"PredictObsCartpole-v0", | |
"OffSwitchCartpole-v0", | |
"SemisuperPendulumNoise-v0", | |
"SemisuperPendulumRandom-v0", | |
"SemisuperPendulumDecay-v0"] | |
def main(): | |
for env_name in ENVS: | |
env = gym.make(env_name) | |
agent = RandomAgent(env.action_space, env.observation_space) | |
out_dir = '/tmp/' + agent.name + '-results' | |
env.monitor.start(out_dir, force=True, video_callable=False) | |
n_episodes = 1000 | |
for i_episode in range(n_episodes): | |
observation = env.reset() | |
reward = 0 | |
done = False | |
action = agent.act(observation, reward, done) | |
while not done: | |
observation, reward, done, info = env.step(action) | |
action = agent.act(observation, reward, done) | |
print gym.scoreboard.scoring.score_from_local(out_dir) | |
env.monitor.close() | |
gym.upload(out_dir, algorithm_id=agent.alg_id, api_key="YOUR_API_KEY") | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class RandomAgent(object): | |
def __init__(self, action_space, observation_space): | |
self.action_space = action_space | |
self.observation_space = observation_space | |
self.name = 'random' | |
self.alg_id = "alg_MhPaN5c4TJOFS4tVFh8x3A" | |
def act(self, observation, reward, done): | |
return self.__validate_action(self.action_space.sample()) | |
# Checks for nan's in action_space samples and replaces with random value selected from a normal distribution(0, 1) | |
def __validate_action(self, action): | |
if hasattr(action, '__iter__'): | |
for i in range(len(action)): | |
self.__validate_action(action[i]) | |
elif np.isnan(action): | |
action = np.random.normal(0, 1.0) | |
return action |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment