Created
October 26, 2016 22:47
-
-
Save stefanopalmieri/bb1517522e9d7351fe95633f563a9db3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Using ES-HyperNEAT to try to solve the Bipedal walker. | |
# This attempt was not successful. Adjustment of hyperparameters is likely needed. | |
# A neural network is trained using NeuroEvolution of Augmenting Topologies | |
# The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies" | |
# This gist is using MultiNEAT (http://multineat.com/) | |
import logging | |
import numpy as np | |
import pickle | |
import gym | |
import MultiNEAT as NEAT | |
# NEAT setup | |
params = NEAT.Parameters() | |
params.PopulationSize = 200; | |
params.DynamicCompatibility = True; | |
params.CompatTreshold = 2.0; | |
params.YoungAgeTreshold = 15; | |
params.SpeciesMaxStagnation = 100; | |
params.OldAgeTreshold = 35; | |
params.MinSpecies = 5; | |
params.MaxSpecies = 10; | |
params.RouletteWheelSelection = False; | |
params.MutateRemLinkProb = 0.02; | |
params.RecurrentProb = 0; | |
params.OverallMutationRate = 0.15; | |
params.MutateAddLinkProb = 0.08; | |
params.MutateAddNeuronProb = 0.01; | |
params.MutateWeightsProb = 0.90; | |
params.MaxWeight = 8.0; | |
params.WeightMutationMaxPower = 0.2; | |
params.WeightReplacementMaxPower = 1.0; | |
params.MutateActivationAProb = 0.0; | |
params.ActivationAMutationMaxPower = 0.5; | |
params.MinActivationA = 0.05; | |
params.MaxActivationA = 6.0; | |
params.MutateNeuronActivationTypeProb = 0.03; | |
params.ActivationFunction_SignedSigmoid_Prob = 0.0; | |
params.ActivationFunction_UnsignedSigmoid_Prob = 0.0; | |
params.ActivationFunction_Tanh_Prob = 1.0; | |
params.ActivationFunction_TanhCubic_Prob = 0.0; | |
params.ActivationFunction_SignedStep_Prob = 1.0; | |
params.ActivationFunction_UnsignedStep_Prob = 0.0; | |
params.ActivationFunction_SignedGauss_Prob = 1.0; | |
params.ActivationFunction_UnsignedGauss_Prob = 0.0; | |
params.ActivationFunction_Abs_Prob = 0.0; | |
params.ActivationFunction_SignedSine_Prob = 1.0; | |
params.ActivationFunction_UnsignedSine_Prob = 0.0; | |
params.ActivationFunction_Linear_Prob = 1.0; | |
params.DivisionThreshold = 0.5; | |
params.VarianceThreshold = 0.03; | |
params.BandThreshold = 0.3; | |
params.InitialDepth = 2; | |
params.MaxDepth = 3; | |
params.IterationLevel = 1; | |
params.Leo = False; | |
params.GeometrySeed = False; | |
params.LeoSeed = False; | |
params.LeoThreshold = 0.3; | |
params.CPPN_Bias = -1.0; | |
params.Qtree_X = 0.0; | |
params.Qtree_Y = 0.0; | |
params.Width = 1.; | |
params.Height = 1.; | |
params.Elitism = 0.1; | |
rng = NEAT.RNG() | |
rng.TimeSeed() | |
list = [] | |
for i in range(0,14): | |
list.append((-1. +(2.*i/13.), -1., 0.)) | |
for i in range(0,10): | |
list.append((-1. +(2.*i/9), -0.5, 0)) | |
substrate = NEAT.Substrate(list, | |
[], | |
[(-1., 1., 0.), (-0.5, 1., 0.), (0.5, 1., 0.), (1., 1., 0.)]) | |
substrate.m_allow_input_hidden_links = False; | |
substrate.m_allow_input_output_links = False; | |
substrate.m_allow_hidden_hidden_links = False; | |
substrate.m_allow_hidden_output_links = False; | |
substrate.m_allow_output_hidden_links = False; | |
substrate.m_allow_output_output_links = False; | |
substrate.m_allow_looped_hidden_links = True; | |
substrate.m_allow_looped_output_links = False; | |
substrate.m_allow_input_hidden_links = True; | |
substrate.m_allow_input_output_links = False; | |
substrate.m_allow_hidden_output_links = True; | |
substrate.m_allow_hidden_hidden_links = True; | |
substrate.m_hidden_nodes_activation = NEAT.ActivationFunction.SIGNED_SIGMOID; | |
substrate.m_output_nodes_activation = NEAT.ActivationFunction.UNSIGNED_SIGMOID; | |
substrate.m_with_distance = False; | |
substrate.m_max_weight_and_bias = 8.0; | |
def trainNetwork(env, seed): | |
# Training parameters | |
generationSize = 50 | |
episode_count = 10 | |
max_steps = 1000 | |
# Max reward for environments that reward 1 for each succesfull step (e.g. CartPole-v0) | |
max_reward = episode_count * max_steps | |
def evaluate(genome): | |
net = NEAT.NeuralNetwork() | |
genome.BuildESHyperNEATPhenotype(net, substrate, params) | |
cum_reward = 0 | |
for i in xrange(episode_count): | |
ob = env.reset() | |
net.Flush() | |
for j in xrange(max_steps): | |
# get next action | |
net.Input(ob) | |
net.Activate() | |
o = net.Output() | |
action = np.clip(o,-1,1) | |
ob, reward, done, _ = env.step(action) | |
cum_reward += reward | |
if done: | |
break | |
return cum_reward | |
# Create initial genome | |
g = NEAT.Genome(0, 24, 0, 4, False, | |
NEAT.ActivationFunction.TANH, NEAT.ActivationFunction.TANH, 0, params) | |
pop = NEAT.Population(g, params, True, 1.0, seed) | |
current_best = None | |
for generation in range(generationSize): | |
for i_episode, genome in enumerate(NEAT.GetGenomeList(pop)): | |
reward = evaluate(genome) | |
if reward == max_reward: | |
return pickle.dumps(genome) | |
genome.SetFitness(reward) | |
print('Generation: {}, max fitness: {}'.format(generation, | |
max((x.GetFitness() for x in NEAT.GetGenomeList(pop))))) | |
current_best = pickle.dumps(pop.GetBestGenome()) | |
pop.Epoch() | |
return current_best | |
env_name = "BipedalWalker" | |
if __name__ == '__main__': | |
# Test the algorithm multiple times | |
for test_case in xrange(0, 1): | |
# setup logger, environment and monitor | |
logger = logging.getLogger() | |
logger.setLevel(logging.INFO) | |
env = gym.make("%s-v2" % env_name) | |
outdir = "/tmp/neat-%s-results-%d" % (env_name, test_case) | |
env.monitor.start(outdir, force=True) | |
# Train network | |
learned = trainNetwork(env, test_case) | |
# Test trained network on 1000 episodes | |
learned_genome = pickle.loads(learned) | |
net = NEAT.NeuralNetwork() | |
learned_genome.BuildESHyperNEATPhenotype( net,substrate, params) | |
episode_count = 1000 | |
max_steps = 1000 | |
for i in xrange(episode_count): | |
ob = env.reset() | |
net.Flush() | |
for j in xrange(max_steps): | |
# get next action | |
net.Input(ob) | |
net.Activate() | |
o = net.Output() | |
action = np.clip(o,-1,1) | |
ob, reward, done, _ = env.step(action) | |
if done: | |
break | |
# Dump result info to disk | |
env.monitor.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello. Nice project you have here. congratulations.
I tried to pickle the best genome and got this error: "TypeError: no default reduce due to non-trivial cinit". Can you please help me solve it please?
Thank you