AdamStelmaszczyk · August 5, 2017 20:31
diff --git a/quickstart.py b/quickstart.py
 # Copyright 2017 reinforce.io. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================

 from osim.env import *
 from tensorforce import Configuration
 from tensorforce.agents import TRPOAgent
 from tensorforce.core.networks import layered_network_builder
 from tensorforce.execution import Runner

 from tensor_force_env import TensorForceEnv

 env = RunEnv(visualize=False)
 env = TensorForceEnv(env)

 print "env.states: %s" % env.states
 print "env.actions: %s" % env.actions

 agent = TRPOAgent(config=Configuration(
    loglevel='info',
    batch_size=100,
    baseline=dict(
        type='mlp',
        size=64,
        hidden_layers=2,
        epochs=20,
        update_batch_size=64,
    ),
    generalized_advantage_estimation=True,
    normalize_advantage=False,
    gae_lambda=0.97,
    max_kl_divergence=0.005,
    cg_iterations=20,
    cg_damping=0.01,
    ls_max_backtracks=20,
    ls_override=False,
    states=env.states,
    actions=env.actions,
    network=layered_network_builder([
        dict(type='dense', size=64, activation='relu'),
        dict(type='dense', size=64, activation='relu'),
    ])
 ))

 runner = Runner(agent=agent, environment=env)


 def episode_finished(r):
    print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(
        ep=r.episode,
        ts=r.timestep,
        reward=r.episode_rewards[-1]
    ))
    return True


 runner.run(episodes=1000, max_timesteps=200, episode_finished=episode_finished)

 print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format(
    ep=runner.episode,
    ar=np.mean(runner.episode_rewards[-100:])
 ))
diff --git a/tensor_force_env.py b/tensor_force_env.py
 import gym
 from tensorforce.environments import Environment


 class TensorForceEnv(Environment):

    def __init__(self, run_env):
        self.run_env = run_env

    def __str__(self):
        return str(self.run_env)

    def close(self):
        self.run_env.close()

    def reset(self, difficulty=0):
        self.run_env.reset(difficulty)

    def execute(self, action):
        observation, reward, done, info = self.run_env.step(action)
        return observation, reward, done

    @property
    def states(self):
        return TensorForceEnv.state_from_space(self.run_env.observation_space)

    @property
    def actions(self):
        return TensorForceEnv.action_from_space(self.run_env.action_space)

    @staticmethod
    def state_from_space(space):
        if isinstance(space, gym.spaces.Discrete):
            return dict(shape=(), type='int')
        elif isinstance(space, gym.spaces.MultiBinary):
            return dict(shape=space.n, type='int')
        elif isinstance(space, gym.spaces.MultiDiscrete):
            return dict(shape=space.num_discrete_space, type='int')
        elif isinstance(space, gym.spaces.Box):
            return dict(shape=tuple(space.shape), type='float')
        elif isinstance(space, gym.spaces.Tuple):
            states = dict()
            n = 0
            for space in space.spaces:
                state = TensorForceEnv.state_from_space(space)
                if 'type' in state:
                    states['state{}'.format(n)] = state
                    n += 1
                else:
                    for state in state.values():
                        states['state{}'.format(n)] = state
                        n += 1
            return states
        else:
            raise RuntimeError('Unknown Gym space.')

    @staticmethod
    def action_from_space(space):
        if isinstance(space, gym.spaces.Discrete):
            return dict(continuous=False, num_actions=space.n)
        elif isinstance(space, gym.spaces.MultiBinary):
            return dict(continuous=False, num_actions=2, shape=space.n)
        elif isinstance(space, gym.spaces.MultiDiscrete):
            if (space.low == space.low[0]).all() and (space.high == space.high[0]).all():
                return dict(continuous=False, num_actions=(space.high[0] - space.low[0]), shape=space.num_discrete_space)
            else:
                actions = dict()
                for n in range(space.num_discrete_space):
                    actions['action{}'.format(n)] = dict(continuous=False, num_actions=(space.high[n] - space.low[n]))
                return actions
        elif isinstance(space, gym.spaces.Box):
            if (space.low == space.low[0]).all() and (space.high == space.high[0]).all():
                return dict(continuous=True, shape=space.low.shape, min_value=space.low[0], max_value=space.high[0])
            else:
                actions = dict()
                low = space.low.flatten()
                high = space.high.flatten()
                for n in range(low.shape[0]):
                    actions['action{}'.format(n)] = dict(continuous=True, min_value=low[n], max_value=high[n])
                return actions
        elif isinstance(space, gym.spaces.Tuple):
            actions = dict()
            n = 0
            for space in space.spaces:
                action = TensorForceEnv.action_from_space(space)
                if 'continuous' in action:
                    actions['action{}'.format(n)] = action
                    n += 1
                else:
                    for action in action.values():
                        actions['action{}'.format(n)] = action
                        n += 1
            return actions
        else:
            raise RuntimeError('Unknown Gym space.')
	# Copyright 2017 reinforce.io. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	from osim.env import *
	from tensorforce import Configuration
	from tensorforce.agents import TRPOAgent
	from tensorforce.core.networks import layered_network_builder
	from tensorforce.execution import Runner

	from tensor_force_env import TensorForceEnv

	env = RunEnv(visualize=False)
	env = TensorForceEnv(env)

	print "env.states: %s" % env.states
	print "env.actions: %s" % env.actions

	agent = TRPOAgent(config=Configuration(
	loglevel='info',
	batch_size=100,
	baseline=dict(
	type='mlp',
	size=64,
	hidden_layers=2,
	epochs=20,
	update_batch_size=64,
	),
	generalized_advantage_estimation=True,
	normalize_advantage=False,
	gae_lambda=0.97,
	max_kl_divergence=0.005,
	cg_iterations=20,
	cg_damping=0.01,
	ls_max_backtracks=20,
	ls_override=False,
	states=env.states,
	actions=env.actions,
	network=layered_network_builder([
	dict(type='dense', size=64, activation='relu'),
	dict(type='dense', size=64, activation='relu'),
	])
	))

	runner = Runner(agent=agent, environment=env)


	def episode_finished(r):
	print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(
	ep=r.episode,
	ts=r.timestep,
	reward=r.episode_rewards[-1]
	))
	return True


	runner.run(episodes=1000, max_timesteps=200, episode_finished=episode_finished)

	print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format(
	ep=runner.episode,
	ar=np.mean(runner.episode_rewards[-100:])
	))
	import gym
	from tensorforce.environments import Environment


	class TensorForceEnv(Environment):

	def __init__(self, run_env):
	self.run_env = run_env

	def __str__(self):
	return str(self.run_env)

	def close(self):
	self.run_env.close()

	def reset(self, difficulty=0):
	self.run_env.reset(difficulty)

	def execute(self, action):
	observation, reward, done, info = self.run_env.step(action)
	return observation, reward, done

	@property
	def states(self):
	return TensorForceEnv.state_from_space(self.run_env.observation_space)

	@property
	def actions(self):
	return TensorForceEnv.action_from_space(self.run_env.action_space)

	@staticmethod
	def state_from_space(space):
	if isinstance(space, gym.spaces.Discrete):
	return dict(shape=(), type='int')
	elif isinstance(space, gym.spaces.MultiBinary):
	return dict(shape=space.n, type='int')
	elif isinstance(space, gym.spaces.MultiDiscrete):
	return dict(shape=space.num_discrete_space, type='int')
	elif isinstance(space, gym.spaces.Box):
	return dict(shape=tuple(space.shape), type='float')
	elif isinstance(space, gym.spaces.Tuple):
	states = dict()
	n = 0
	for space in space.spaces:
	state = TensorForceEnv.state_from_space(space)
	if 'type' in state:
	states['state{}'.format(n)] = state
	n += 1
	else:
	for state in state.values():
	states['state{}'.format(n)] = state
	n += 1
	return states
	else:
	raise RuntimeError('Unknown Gym space.')

	@staticmethod
	def action_from_space(space):
	if isinstance(space, gym.spaces.Discrete):
	return dict(continuous=False, num_actions=space.n)
	elif isinstance(space, gym.spaces.MultiBinary):
	return dict(continuous=False, num_actions=2, shape=space.n)
	elif isinstance(space, gym.spaces.MultiDiscrete):
	if (space.low == space.low[0]).all() and (space.high == space.high[0]).all():
	return dict(continuous=False, num_actions=(space.high[0] - space.low[0]), shape=space.num_discrete_space)
	else:
	actions = dict()
	for n in range(space.num_discrete_space):
	actions['action{}'.format(n)] = dict(continuous=False, num_actions=(space.high[n] - space.low[n]))
	return actions
	elif isinstance(space, gym.spaces.Box):
	if (space.low == space.low[0]).all() and (space.high == space.high[0]).all():
	return dict(continuous=True, shape=space.low.shape, min_value=space.low[0], max_value=space.high[0])
	else:
	actions = dict()
	low = space.low.flatten()
	high = space.high.flatten()
	for n in range(low.shape[0]):
	actions['action{}'.format(n)] = dict(continuous=True, min_value=low[n], max_value=high[n])
	return actions
	elif isinstance(space, gym.spaces.Tuple):
	actions = dict()
	n = 0
	for space in space.spaces:
	action = TensorForceEnv.action_from_space(space)
	if 'continuous' in action:
	actions['action{}'.format(n)] = action
	n += 1
	else:
	for action in action.values():
	actions['action{}'.format(n)] = action
	n += 1
	return actions
	else:
	raise RuntimeError('Unknown Gym space.')