araffin · September 18, 2018 09:29
diff --git a/custom_policy.py b/custom_policy.py
 from stable_baselines.common.policies import FeedForwardPolicy
 from stable_baselines import A2C

 # Custom MLP policy of three layers of size 128 each
 class CustomPolicy(FeedForwardPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomPolicy, self).__init__(*args, **kwargs,
                                           layers=[128, 128, 128],
                                           feature_extraction="mlp")

 model = A2C(CustomPolicy, 'LunarLander-v2', verbose=1)
 # Train the agent
 model.learn(total_timesteps=100000)
	from stable_baselines.common.policies import FeedForwardPolicy
	from stable_baselines import A2C

	# Custom MLP policy of three layers of size 128 each
	class CustomPolicy(FeedForwardPolicy):
	def __init__(self, args, *kwargs):
	super(CustomPolicy, self).__init__(args, *kwargs,
	layers=[128, 128, 128],
	feature_extraction="mlp")

	model = A2C(CustomPolicy, 'LunarLander-v2', verbose=1)
	# Train the agent
	model.learn(total_timesteps=100000)
No results found