Last active
November 18, 2022 02:45
-
-
Save pranz24/7023a789e18e449cdb7ba7acae4075a9 to your computer and use it in GitHub Desktop.
Solving LunarLander-v2 using CMA-ES in Pybrain
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A small neural network is trained using Covariance Matrix Adaptation Evolution Strategies | |
# The idea is from this paper: https://arxiv.org/abs/1604.00772 | |
# This gist is using pybrain (http://pybrain.org/) | |
import logging | |
import os | |
import numpy as np | |
import gym | |
from gym import wrappers | |
from pybrain.tools.shortcuts import buildNetwork | |
from pybrain.structure.modules import TanhLayer | |
#from pybrain.structure.modules import LinearLayer | |
#from pybrain.structure.modules import ReluLayer | |
from pybrain.optimization import CMAES | |
#Buliding a simple neural network with 3 layers | |
def buildNet(n_input, n_output): | |
# The algorithm also converges using linear and relu layer at the hidden or output unit. | |
return buildNetwork(n_input,16, 12, n_output, hiddenclass=TanhLayer, outclass=TanhLayer, outputbias=False, recurrent=False) | |
def trainNetwork(env): | |
# The training functions uses the average of the cumulated reward and maximum height as fitness | |
def objF(params): | |
nn = buildNet(len(env.observation_space.high), env.action_space.n) | |
nn._setParameters(np.array(params)) | |
cum_reward = 0 | |
highest_count = [] | |
episode_count = 15 | |
max_steps = 1200 | |
for i in range(episode_count): | |
highest = -1 | |
ob = env.reset() | |
for j in range(max_steps): | |
result = nn.activate(ob) | |
action = np.argmax(result) | |
ob, reward, done, _ = env.step(action) | |
cum_reward += reward | |
if highest < ob[0]: | |
highest = ob[0] | |
if done: | |
break | |
nn.reset() | |
highest_count.append(highest) | |
return (sum(highest_count) + cum_reward) / len(highest_count) | |
# Build net for initial random params | |
n = buildNet(len(env.observation_space.high), env.action_space.n) | |
x0 = n.params | |
l = CMAES(objF, x0, verbose=True) | |
# Some arbitrary desired fitness value | |
l.desiredEvaluation = -3600 | |
l.maxLearningSteps = 200 | |
l.mustMaximize | |
l.minimize = False | |
learned = l.learn() | |
return learned | |
if __name__ == '__main__': | |
# Initialization | |
logger = logging.getLogger() | |
logger.setLevel(logging.INFO) | |
env = gym.make('LunarLander-v2') | |
print(env.observation_space) | |
print(env.action_space) | |
outdir = '/tmp/CMAES-LunarLander-results' | |
env = gym.wrappers.Monitor(env, outdir, force=True) | |
learned = trainNetwork(env) | |
nn = buildNet(len(env.observation_space.high), env.action_space.n) | |
nn._setParameters(np.array(learned[0])) | |
env.close() | |
# Test the trained network | |
episode_count = 300 | |
max_steps = 1200 | |
for i in range(episode_count): | |
ob = env.reset() | |
for j in range(max_steps): | |
result = nn.activate(ob) | |
action = np.argmax(result) | |
ob, reward, done, _ = env.step(action) | |
if done: | |
nn.reset() | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment