Skip to content

Instantly share code, notes, and snippets.

@AdamStelmaszczyk
Last active August 5, 2017 20:31
Show Gist options
  • Save AdamStelmaszczyk/783c93be639d2e23ebddd8db4c9d52f7 to your computer and use it in GitHub Desktop.
Save AdamStelmaszczyk/783c93be639d2e23ebddd8db4c9d52f7 to your computer and use it in GitHub Desktop.
# Copyright 2017 reinforce.io. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from osim.env import *
from tensorforce import Configuration
from tensorforce.agents import TRPOAgent
from tensorforce.core.networks import layered_network_builder
from tensorforce.execution import Runner
from tensor_force_env import TensorForceEnv
env = RunEnv(visualize=False)
env = TensorForceEnv(env)
print "env.states: %s" % env.states
print "env.actions: %s" % env.actions
agent = TRPOAgent(config=Configuration(
loglevel='info',
batch_size=100,
baseline=dict(
type='mlp',
size=64,
hidden_layers=2,
epochs=20,
update_batch_size=64,
),
generalized_advantage_estimation=True,
normalize_advantage=False,
gae_lambda=0.97,
max_kl_divergence=0.005,
cg_iterations=20,
cg_damping=0.01,
ls_max_backtracks=20,
ls_override=False,
states=env.states,
actions=env.actions,
network=layered_network_builder([
dict(type='dense', size=64, activation='relu'),
dict(type='dense', size=64, activation='relu'),
])
))
runner = Runner(agent=agent, environment=env)
def episode_finished(r):
print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(
ep=r.episode,
ts=r.timestep,
reward=r.episode_rewards[-1]
))
return True
runner.run(episodes=1000, max_timesteps=200, episode_finished=episode_finished)
print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format(
ep=runner.episode,
ar=np.mean(runner.episode_rewards[-100:])
))
import gym
from tensorforce.environments import Environment
class TensorForceEnv(Environment):
def __init__(self, run_env):
self.run_env = run_env
def __str__(self):
return str(self.run_env)
def close(self):
self.run_env.close()
def reset(self, difficulty=0):
self.run_env.reset(difficulty)
def execute(self, action):
observation, reward, done, info = self.run_env.step(action)
return observation, reward, done
@property
def states(self):
return TensorForceEnv.state_from_space(self.run_env.observation_space)
@property
def actions(self):
return TensorForceEnv.action_from_space(self.run_env.action_space)
@staticmethod
def state_from_space(space):
if isinstance(space, gym.spaces.Discrete):
return dict(shape=(), type='int')
elif isinstance(space, gym.spaces.MultiBinary):
return dict(shape=space.n, type='int')
elif isinstance(space, gym.spaces.MultiDiscrete):
return dict(shape=space.num_discrete_space, type='int')
elif isinstance(space, gym.spaces.Box):
return dict(shape=tuple(space.shape), type='float')
elif isinstance(space, gym.spaces.Tuple):
states = dict()
n = 0
for space in space.spaces:
state = TensorForceEnv.state_from_space(space)
if 'type' in state:
states['state{}'.format(n)] = state
n += 1
else:
for state in state.values():
states['state{}'.format(n)] = state
n += 1
return states
else:
raise RuntimeError('Unknown Gym space.')
@staticmethod
def action_from_space(space):
if isinstance(space, gym.spaces.Discrete):
return dict(continuous=False, num_actions=space.n)
elif isinstance(space, gym.spaces.MultiBinary):
return dict(continuous=False, num_actions=2, shape=space.n)
elif isinstance(space, gym.spaces.MultiDiscrete):
if (space.low == space.low[0]).all() and (space.high == space.high[0]).all():
return dict(continuous=False, num_actions=(space.high[0] - space.low[0]), shape=space.num_discrete_space)
else:
actions = dict()
for n in range(space.num_discrete_space):
actions['action{}'.format(n)] = dict(continuous=False, num_actions=(space.high[n] - space.low[n]))
return actions
elif isinstance(space, gym.spaces.Box):
if (space.low == space.low[0]).all() and (space.high == space.high[0]).all():
return dict(continuous=True, shape=space.low.shape, min_value=space.low[0], max_value=space.high[0])
else:
actions = dict()
low = space.low.flatten()
high = space.high.flatten()
for n in range(low.shape[0]):
actions['action{}'.format(n)] = dict(continuous=True, min_value=low[n], max_value=high[n])
return actions
elif isinstance(space, gym.spaces.Tuple):
actions = dict()
n = 0
for space in space.spaces:
action = TensorForceEnv.action_from_space(space)
if 'continuous' in action:
actions['action{}'.format(n)] = action
n += 1
else:
for action in action.values():
actions['action{}'.format(n)] = action
n += 1
return actions
else:
raise RuntimeError('Unknown Gym space.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment