Skip to content

Instantly share code, notes, and snippets.

@stefanopalmieri
Created October 29, 2016 17:06
Show Gist options
  • Save stefanopalmieri/9cafe4a507616412e88018151551bebb to your computer and use it in GitHub Desktop.
Save stefanopalmieri/9cafe4a507616412e88018151551bebb to your computer and use it in GitHub Desktop.
# A neural network is trained using ES-HyperNEAT
# The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies"
# This gist is using MultiNEAT (http://multineat.com/)
import logging
import numpy as np
import pickle
import gym
import MultiNEAT as NEAT
# NEAT setup
params = NEAT.Parameters()
params.PopulationSize = 20;
params.DynamicCompatibility = True;
params.CompatTreshold = 2.0;
params.YoungAgeTreshold = 15;
params.SpeciesMaxStagnation = 100;
params.OldAgeTreshold = 35;
params.MinSpecies = 5;
params.MaxSpecies = 10;
params.RouletteWheelSelection = False;
params.MutateRemLinkProb = 0.02;
params.RecurrentProb = 0;
params.OverallMutationRate = 0.15;
params.MutateAddLinkProb = 0.08;
params.MutateAddNeuronProb = 0.01;
params.MutateWeightsProb = 0.90;
params.MaxWeight = 8.0;
params.WeightMutationMaxPower = 0.2;
params.WeightReplacementMaxPower = 1.0;
params.MutateActivationAProb = 0.0;
params.ActivationAMutationMaxPower = 0.5;
params.MinActivationA = 0.05;
params.MaxActivationA = 6.0;
params.MutateNeuronActivationTypeProb = 0.03;
params.ActivationFunction_SignedSigmoid_Prob = 0.0;
params.ActivationFunction_UnsignedSigmoid_Prob = 0.0;
params.ActivationFunction_Tanh_Prob = 1.0;
params.ActivationFunction_TanhCubic_Prob = 0.0;
params.ActivationFunction_SignedStep_Prob = 1.0;
params.ActivationFunction_UnsignedStep_Prob = 0.0;
params.ActivationFunction_SignedGauss_Prob = 1.0;
params.ActivationFunction_UnsignedGauss_Prob = 0.0;
params.ActivationFunction_Abs_Prob = 0.0;
params.ActivationFunction_SignedSine_Prob = 1.0;
params.ActivationFunction_UnsignedSine_Prob = 0.0;
params.ActivationFunction_Linear_Prob = 1.0;
params.DivisionThreshold = 0.5;
params.VarianceThreshold = 0.03;
params.BandThreshold = 0.3;
params.InitialDepth = 2;
params.MaxDepth = 3;
params.IterationLevel = 1;
params.Leo = False;
params.GeometrySeed = False;
params.LeoSeed = False;
params.LeoThreshold = 0.3;
params.CPPN_Bias = -1.0;
params.Qtree_X = 0.0;
params.Qtree_Y = 0.0;
params.Width = 1.;
params.Height = 1.;
params.Elitism = 0.1;
rng = NEAT.RNG()
rng.TimeSeed()
list = []
for i in range(0,4):
list.append((-1. +(2.*i/3.), -1., 0.))
print(list)
#for i in range(0,10):
# list.append((-1. +(2.*i/9), -0.5, 0))
# append bias input
list.append((0., -0.8, 0.))
substrate = NEAT.Substrate(list,
[],
[(-1., 1., 0.), (1., 1., 0.)])
substrate.m_allow_input_hidden_links = True;
substrate.m_allow_input_output_links = True;
substrate.m_allow_hidden_hidden_links = True;
substrate.m_allow_hidden_output_links = True;
substrate.m_allow_output_hidden_links = True;
substrate.m_allow_output_output_links = False;
substrate.m_allow_looped_hidden_links = True;
substrate.m_allow_looped_output_links = True;
substrate.m_allow_input_hidden_links = True;
substrate.m_allow_input_output_links = True;
substrate.m_allow_hidden_output_links = True;
substrate.m_allow_hidden_hidden_links = True;
substrate.m_hidden_nodes_activation = NEAT.ActivationFunction.SIGNED_SIGMOID;
substrate.m_output_nodes_activation = NEAT.ActivationFunction.UNSIGNED_SIGMOID;
substrate.m_with_distance = False;
substrate.m_max_weight_and_bias = 8.0;
def trainNetwork(env, seed):
# Training parameters
generationSize = 30
episode_count = 10
max_steps = 475
# Max reward for environments that reward 1 for each succesfull step (e.g. CartPole-v0)
max_reward = episode_count * max_steps
def evaluate(genome):
net = NEAT.NeuralNetwork()
genome.BuildESHyperNEATPhenotype(net, substrate, params)
# genome.BuildPhenotype(net)
cum_reward = 0
for i in xrange(episode_count):
ob = env.reset()
net.Flush()
for j in xrange(max_steps):
# get next action
# bias for es-hyperneat
ob = np.append(ob, [1.])
net.Input(ob)
net.Activate()
o = net.Output()
# action = np.clip(o,-1,1)
action = np.argmax(o)
ob, reward, done, _ = env.step(action)
cum_reward += reward
if done:
break
return cum_reward
# Create initial genome
g = NEAT.Genome(0, 5, 0, 2, False,
NEAT.ActivationFunction.TANH, NEAT.ActivationFunction.TANH, 0, params)
pop = NEAT.Population(g, params, True, 1.0, seed)
current_best = None
for generation in range(generationSize):
for i_episode, genome in enumerate(NEAT.GetGenomeList(pop)):
reward = evaluate(genome)
if reward == max_reward:
return pickle.dumps(genome)
genome.SetFitness(reward)
print('Generation: {}, max fitness: {}'.format(generation,
max((x.GetFitness() for x in NEAT.GetGenomeList(pop)))))
current_best = pickle.dumps(pop.GetBestGenome())
pop.Epoch()
return current_best
env_name = "CartPole"
if __name__ == '__main__':
# Test the algorithm multiple times
for test_case in xrange(0, 1):
# setup logger, environment and monitor
logger = logging.getLogger()
logger.setLevel(logging.INFO)
env = gym.make("%s-v1" % env_name)
outdir = "/tmp/neat-%s-results-%d" % (env_name, test_case)
env.monitor.start(outdir, force=True)
# Train network
learned = trainNetwork(env, test_case)
# Test trained network on 1000 episodes
learned_genome = pickle.loads(learned)
net = NEAT.NeuralNetwork()
learned_genome.BuildESHyperNEATPhenotype(net, substrate, params)
# learned_genome.BuildPhenotype(net)
episode_count = 1000
max_steps = 475
for i in xrange(episode_count):
ob = env.reset()
net.Flush()
for j in xrange(max_steps):
# get next action
# bias for es-hyperneat
ob = np.append(ob, [1.])
net.Input(ob)
net.Activate()
o = net.Output()
# action = np.clip(o,-1,1)
action = np.argmax(o)
ob, reward, done, _ = env.step(action)
if done:
break
# Dump result info to disk
env.monitor.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment