Last active
October 16, 2016 05:49
-
-
Save hugoalvarado/c9f0f27bf619f4bafac840fac5c52d1c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import numpy as np | |
import time | |
def run_test_env(env_name, perform_action = True, exit_on_done = True): | |
env = gym.make(env_name) | |
for i_episode in range(20): | |
observation = env.reset() | |
for t in range(100): | |
env.render() | |
if not perform_action: | |
continue | |
action = env.action_space.sample() | |
print(action) | |
observation, reward, done, info = env.step(action) | |
'''Returns: | |
observation (object): agent's observation of the current environment | |
reward (float) : amount of reward returned after previous action | |
done (boolean): whether the episode has ended, in which case further step() calls will return undefined results | |
info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) | |
''' | |
#print("Observation %s reward %s done %s info $s" % (1, reward, done, 1)) | |
print(observation, reward, done, info) | |
if done: | |
print('Episode {} done in {} steps'.format(i_episode, t)) | |
if exit_on_done: | |
break | |
# use hill climbing algorithm, initialize weights randomly | |
# use memory to save good weights | |
def run_episode(env, parameters): | |
observation = env.reset() | |
total_reward = 0 | |
for _ in range(200): | |
env.render() | |
# move left (0) or right (1) | |
# initialize random weights | |
action = 0 if np.matmul(parameters, observation) < 0 else 1 | |
observation, reward, done, info = env.step(action) | |
# wait a little to make it easier to view the action behavior | |
#time.sleep(0.2) | |
total_reward += reward | |
if done: | |
print('Done') | |
break | |
return total_reward | |
def random_parameters(param_count): | |
return np.random.rand(param_count) * 2 -1 | |
#hill climbing | |
def train(submit): | |
env = gym.make('CartPole-v0') | |
noise_scaling = 0.1 | |
parameters = random_parameters(4) | |
new_parameters = None | |
best_reward = 0 | |
for i in range(2000): | |
new_parameters = parameters + random_parameters(4) * noise_scaling | |
reward = run_episode(env, new_parameters) | |
print("Reward %d best %d : %f %f %f %f" % (reward, best_reward, *parameters)) | |
if reward > best_reward: | |
print("New reward %d" % reward) | |
best_reward = reward | |
parameters = new_parameters | |
if reward == 200: | |
print(i) | |
break | |
return parameters | |
#r = train(submit = False) | |
#view different start positions | |
#run_test_env('CartPole-v0', perform_action=False, exit_on_done=False) | |
#view behavior of cart with random actions, do not exit environment | |
#even if pole is past the recovery point | |
#run_test_env('CartPole-v0', perform_action=True, exit_on_done=False) | |
#run_test_env('CartPole-v0') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment