pranz24 · November 18, 2022 02:45
diff --git a/Solving_LunarLander(CMA-ES).py b/Solving_LunarLander(CMA-ES).py
 # A small neural network is trained using Covariance Matrix Adaptation Evolution Strategies
 # The idea is from this paper: https://arxiv.org/abs/1604.00772
 # This gist is using pybrain (http://pybrain.org/)


 import logging
 import os
 import numpy as np

 import gym
 from gym import wrappers
 from pybrain.tools.shortcuts import buildNetwork
 from pybrain.structure.modules import TanhLayer
 #from pybrain.structure.modules import LinearLayer
 #from pybrain.structure.modules import ReluLayer
 from pybrain.optimization import CMAES

 #Buliding a simple neural network with 3 layers
 def buildNet(n_input, n_output):
    # The algorithm also converges using linear and relu layer at the hidden or output unit.
    return buildNetwork(n_input,16, 12, n_output, hiddenclass=TanhLayer, outclass=TanhLayer, outputbias=False, recurrent=False)


 def trainNetwork(env):
    # The training functions uses the average of the cumulated reward and maximum height as fitness
    def objF(params):
        nn = buildNet(len(env.observation_space.high), env.action_space.n)
        nn._setParameters(np.array(params))

        cum_reward = 0
        highest_count = []
        episode_count = 15
        max_steps = 1200

        for i in range(episode_count):
            highest = -1
            ob = env.reset()

            for j in range(max_steps):
                result = nn.activate(ob)
                action = np.argmax(result)
                ob, reward, done, _ = env.step(action)
                cum_reward += reward

                if highest < ob[0]:
                    highest = ob[0]

                if done:
                    break

            nn.reset()

        highest_count.append(highest)

        return (sum(highest_count) + cum_reward) / len(highest_count)
    
    # Build net for initial random params
    n = buildNet(len(env.observation_space.high), env.action_space.n)
    x0 = n.params

    l = CMAES(objF, x0, verbose=True)

    # Some arbitrary desired fitness value
    l.desiredEvaluation = -3600
    l.maxLearningSteps = 200
    l.mustMaximize
    l.minimize = False
    learned = l.learn()

    return learned


 if __name__ == '__main__':
    # Initialization
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    env = gym.make('LunarLander-v2')
    print(env.observation_space)
    print(env.action_space)
    outdir = '/tmp/CMAES-LunarLander-results'
    env = gym.wrappers.Monitor(env, outdir, force=True)

    learned = trainNetwork(env)

    nn = buildNet(len(env.observation_space.high), env.action_space.n)
    nn._setParameters(np.array(learned[0]))
    env.close()
    # Test the trained network
    episode_count = 300
    max_steps = 1200

    for i in range(episode_count):
        ob = env.reset()

        for j in range(max_steps):
            result = nn.activate(ob)
            action = np.argmax(result)
            ob, reward, done, _ = env.step(action)
            if done:
                nn.reset()
                break
	# A small neural network is trained using Covariance Matrix Adaptation Evolution Strategies
	# The idea is from this paper: https://arxiv.org/abs/1604.00772
	# This gist is using pybrain (http://pybrain.org/)


	import logging
	import os
	import numpy as np

	import gym
	from gym import wrappers
	from pybrain.tools.shortcuts import buildNetwork
	from pybrain.structure.modules import TanhLayer
	#from pybrain.structure.modules import LinearLayer
	#from pybrain.structure.modules import ReluLayer
	from pybrain.optimization import CMAES

	#Buliding a simple neural network with 3 layers
	def buildNet(n_input, n_output):
	# The algorithm also converges using linear and relu layer at the hidden or output unit.
	return buildNetwork(n_input,16, 12, n_output, hiddenclass=TanhLayer, outclass=TanhLayer, outputbias=False, recurrent=False)


	def trainNetwork(env):
	# The training functions uses the average of the cumulated reward and maximum height as fitness
	def objF(params):
	nn = buildNet(len(env.observation_space.high), env.action_space.n)
	nn._setParameters(np.array(params))

	cum_reward = 0
	highest_count = []
	episode_count = 15
	max_steps = 1200

	for i in range(episode_count):
	highest = -1
	ob = env.reset()

	for j in range(max_steps):
	result = nn.activate(ob)
	action = np.argmax(result)
	ob, reward, done, _ = env.step(action)
	cum_reward += reward

	if highest < ob[0]:
	highest = ob[0]

	if done:
	break

	nn.reset()

	highest_count.append(highest)

	return (sum(highest_count) + cum_reward) / len(highest_count)

	# Build net for initial random params
	n = buildNet(len(env.observation_space.high), env.action_space.n)
	x0 = n.params

	l = CMAES(objF, x0, verbose=True)

	# Some arbitrary desired fitness value
	l.desiredEvaluation = -3600
	l.maxLearningSteps = 200
	l.mustMaximize
	l.minimize = False
	learned = l.learn()

	return learned


	if __name__ == '__main__':
	# Initialization
	logger = logging.getLogger()
	logger.setLevel(logging.INFO)
	env = gym.make('LunarLander-v2')
	print(env.observation_space)
	print(env.action_space)
	outdir = '/tmp/CMAES-LunarLander-results'
	env = gym.wrappers.Monitor(env, outdir, force=True)

	learned = trainNetwork(env)

	nn = buildNet(len(env.observation_space.high), env.action_space.n)
	nn._setParameters(np.array(learned[0]))
	env.close()
	# Test the trained network
	episode_count = 300
	max_steps = 1200

	for i in range(episode_count):
	ob = env.reset()

	for j in range(max_steps):
	result = nn.activate(ob)
	action = np.argmax(result)
	ob, reward, done, _ = env.step(action)
	if done:
	nn.reset()
	break