Created
November 24, 2017 00:57
-
-
Save vwxyzjn/d906703cdd96f84808c40a40aa8fe64e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from tensorforce.agents import PPOAgent | |
from tensorforce.execution import Runner | |
from tensorforce.contrib.openai_gym import OpenAIGym | |
# Create an OpenAIgym environment | |
# ReversedAddition-v0 | |
# CartPole-v0 | |
env = OpenAIGym('ReversedAddition-v0', visualize=False) | |
print(env.gym.observation_space) | |
print(env.gym.action_space) | |
# Network as list of layers | |
network_spec = [ | |
dict(type='embedding', size=32, indices=100), | |
dict(type='dense', size=32), | |
dict(type='dense', size=32) | |
] | |
agent = PPOAgent( | |
states_spec=env.states, | |
actions_spec=env.actions, | |
network_spec=network_spec, | |
batch_size=4096, | |
# Agent | |
preprocessing=None, | |
exploration=None, | |
reward_preprocessing=None, | |
# BatchAgent | |
keep_last_timestep=True, | |
# PPOAgent | |
step_optimizer=dict( | |
type='adam', | |
learning_rate=1e-3 | |
), | |
optimization_steps=10, | |
# Model | |
scope='ppo', | |
discount=0.99, | |
# DistributionModel | |
distributions_spec=None, | |
entropy_regularization=0.01, | |
# PGModel | |
baseline_mode=None, | |
baseline=None, | |
baseline_optimizer=None, | |
gae_lambda=None, | |
normalize_rewards=False, | |
# PGLRModel | |
likelihood_ratio_clipping=0.2, | |
summary_spec=None, | |
distributed_spec=None | |
) | |
# Create the runner | |
runner = Runner(agent=agent, environment=env) | |
# Callback function printing episode statistics | |
def episode_finished(r): | |
print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep, | |
reward=r.episode_rewards[-1])) | |
return True | |
# Start learning | |
runner.run(episodes=10, max_episode_timesteps=200, episode_finished=episode_finished) | |
# Print statistics | |
print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format( | |
ep=runner.episode, | |
ar=np.mean(runner.episode_rewards[-100:])) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment