tigerneil · May 16, 2016 13:15
diff --git a/rllab_gym_ddpg.py b/rllab_gym_ddpg.py
 from __future__ import print_function
 from __future__ import absolute_import
 import os

 os.environ['THEANO_FLAGS'] = 'device=cpu,mode=FAST_COMPILE,optimizer=None'

 from rllab.algos.ddpg import DDPG
 from rllab.envs.box2d.cartpole_env import CartpoleEnv
 from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
 from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
 from rllab.exploration_strategies.ou_strategy import OUStrategy
 from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 from rllab.envs.gym_env import GymEnv
 from rllab.envs.normalized_env import normalize
 from rllab.misc.instrument import stub, run_experiment_lite

 from nose2 import tools
 import numpy as np

 stub(globals())

 env = normalize(GymEnv("Pendulum-v0"))

 policy = DeterministicMLPPolicy(env.spec)
 qf = ContinuousMLPQFunction(env.spec)
 es = OUStrategy(env.spec)

 algo = DDPG(
    env=env, policy=policy, qf=qf, es=es,
    n_epochs=10000,
    epoch_length=100,
    batch_size=64,
    min_pool_size=500,
    replay_pool_size=10000,
    eval_samples=100,
 )

 run_experiment_lite(
    algo.train(),
    # Number of parallel workers for sampling
    # n_parallel=1,
    # Only keep the snapshot parameters for the last iteration
    snapshot_mode="last",
    # Specifies the seed for the experiment. If this is not provided, a random seed
    # will be used
    seed=1,
    plot=True,
 )
	from __future__ import print_function
	from __future__ import absolute_import
	import os

	os.environ['THEANO_FLAGS'] = 'device=cpu,mode=FAST_COMPILE,optimizer=None'

	from rllab.algos.ddpg import DDPG
	from rllab.envs.box2d.cartpole_env import CartpoleEnv
	from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
	from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
	from rllab.exploration_strategies.ou_strategy import OUStrategy
	from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
	from rllab.envs.gym_env import GymEnv
	from rllab.envs.normalized_env import normalize
	from rllab.misc.instrument import stub, run_experiment_lite

	from nose2 import tools
	import numpy as np

	stub(globals())

	env = normalize(GymEnv("Pendulum-v0"))

	policy = DeterministicMLPPolicy(env.spec)
	qf = ContinuousMLPQFunction(env.spec)
	es = OUStrategy(env.spec)

	algo = DDPG(
	env=env, policy=policy, qf=qf, es=es,
	n_epochs=10000,
	epoch_length=100,
	batch_size=64,
	min_pool_size=500,
	replay_pool_size=10000,
	eval_samples=100,
	)

	run_experiment_lite(
	algo.train(),
	# Number of parallel workers for sampling
	# n_parallel=1,
	# Only keep the snapshot parameters for the last iteration
	snapshot_mode="last",
	# Specifies the seed for the experiment. If this is not provided, a random seed
	# will be used
	seed=1,
	plot=True,
	)
No results found