Created
October 29, 2016 17:06
-
-
Save stefanopalmieri/9cafe4a507616412e88018151551bebb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A neural network is trained using ES-HyperNEAT | |
# The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies" | |
# This gist is using MultiNEAT (http://multineat.com/) | |
import logging | |
import numpy as np | |
import pickle | |
import gym | |
import MultiNEAT as NEAT | |
# NEAT setup | |
params = NEAT.Parameters() | |
params.PopulationSize = 20; | |
params.DynamicCompatibility = True; | |
params.CompatTreshold = 2.0; | |
params.YoungAgeTreshold = 15; | |
params.SpeciesMaxStagnation = 100; | |
params.OldAgeTreshold = 35; | |
params.MinSpecies = 5; | |
params.MaxSpecies = 10; | |
params.RouletteWheelSelection = False; | |
params.MutateRemLinkProb = 0.02; | |
params.RecurrentProb = 0; | |
params.OverallMutationRate = 0.15; | |
params.MutateAddLinkProb = 0.08; | |
params.MutateAddNeuronProb = 0.01; | |
params.MutateWeightsProb = 0.90; | |
params.MaxWeight = 8.0; | |
params.WeightMutationMaxPower = 0.2; | |
params.WeightReplacementMaxPower = 1.0; | |
params.MutateActivationAProb = 0.0; | |
params.ActivationAMutationMaxPower = 0.5; | |
params.MinActivationA = 0.05; | |
params.MaxActivationA = 6.0; | |
params.MutateNeuronActivationTypeProb = 0.03; | |
params.ActivationFunction_SignedSigmoid_Prob = 0.0; | |
params.ActivationFunction_UnsignedSigmoid_Prob = 0.0; | |
params.ActivationFunction_Tanh_Prob = 1.0; | |
params.ActivationFunction_TanhCubic_Prob = 0.0; | |
params.ActivationFunction_SignedStep_Prob = 1.0; | |
params.ActivationFunction_UnsignedStep_Prob = 0.0; | |
params.ActivationFunction_SignedGauss_Prob = 1.0; | |
params.ActivationFunction_UnsignedGauss_Prob = 0.0; | |
params.ActivationFunction_Abs_Prob = 0.0; | |
params.ActivationFunction_SignedSine_Prob = 1.0; | |
params.ActivationFunction_UnsignedSine_Prob = 0.0; | |
params.ActivationFunction_Linear_Prob = 1.0; | |
params.DivisionThreshold = 0.5; | |
params.VarianceThreshold = 0.03; | |
params.BandThreshold = 0.3; | |
params.InitialDepth = 2; | |
params.MaxDepth = 3; | |
params.IterationLevel = 1; | |
params.Leo = False; | |
params.GeometrySeed = False; | |
params.LeoSeed = False; | |
params.LeoThreshold = 0.3; | |
params.CPPN_Bias = -1.0; | |
params.Qtree_X = 0.0; | |
params.Qtree_Y = 0.0; | |
params.Width = 1.; | |
params.Height = 1.; | |
params.Elitism = 0.1; | |
rng = NEAT.RNG() | |
rng.TimeSeed() | |
list = [] | |
for i in range(0,4): | |
list.append((-1. +(2.*i/3.), -1., 0.)) | |
print(list) | |
#for i in range(0,10): | |
# list.append((-1. +(2.*i/9), -0.5, 0)) | |
# append bias input | |
list.append((0., -0.8, 0.)) | |
substrate = NEAT.Substrate(list, | |
[], | |
[(-1., 1., 0.), (1., 1., 0.)]) | |
substrate.m_allow_input_hidden_links = True; | |
substrate.m_allow_input_output_links = True; | |
substrate.m_allow_hidden_hidden_links = True; | |
substrate.m_allow_hidden_output_links = True; | |
substrate.m_allow_output_hidden_links = True; | |
substrate.m_allow_output_output_links = False; | |
substrate.m_allow_looped_hidden_links = True; | |
substrate.m_allow_looped_output_links = True; | |
substrate.m_allow_input_hidden_links = True; | |
substrate.m_allow_input_output_links = True; | |
substrate.m_allow_hidden_output_links = True; | |
substrate.m_allow_hidden_hidden_links = True; | |
substrate.m_hidden_nodes_activation = NEAT.ActivationFunction.SIGNED_SIGMOID; | |
substrate.m_output_nodes_activation = NEAT.ActivationFunction.UNSIGNED_SIGMOID; | |
substrate.m_with_distance = False; | |
substrate.m_max_weight_and_bias = 8.0; | |
def trainNetwork(env, seed): | |
# Training parameters | |
generationSize = 30 | |
episode_count = 10 | |
max_steps = 475 | |
# Max reward for environments that reward 1 for each succesfull step (e.g. CartPole-v0) | |
max_reward = episode_count * max_steps | |
def evaluate(genome): | |
net = NEAT.NeuralNetwork() | |
genome.BuildESHyperNEATPhenotype(net, substrate, params) | |
# genome.BuildPhenotype(net) | |
cum_reward = 0 | |
for i in xrange(episode_count): | |
ob = env.reset() | |
net.Flush() | |
for j in xrange(max_steps): | |
# get next action | |
# bias for es-hyperneat | |
ob = np.append(ob, [1.]) | |
net.Input(ob) | |
net.Activate() | |
o = net.Output() | |
# action = np.clip(o,-1,1) | |
action = np.argmax(o) | |
ob, reward, done, _ = env.step(action) | |
cum_reward += reward | |
if done: | |
break | |
return cum_reward | |
# Create initial genome | |
g = NEAT.Genome(0, 5, 0, 2, False, | |
NEAT.ActivationFunction.TANH, NEAT.ActivationFunction.TANH, 0, params) | |
pop = NEAT.Population(g, params, True, 1.0, seed) | |
current_best = None | |
for generation in range(generationSize): | |
for i_episode, genome in enumerate(NEAT.GetGenomeList(pop)): | |
reward = evaluate(genome) | |
if reward == max_reward: | |
return pickle.dumps(genome) | |
genome.SetFitness(reward) | |
print('Generation: {}, max fitness: {}'.format(generation, | |
max((x.GetFitness() for x in NEAT.GetGenomeList(pop))))) | |
current_best = pickle.dumps(pop.GetBestGenome()) | |
pop.Epoch() | |
return current_best | |
env_name = "CartPole" | |
if __name__ == '__main__': | |
# Test the algorithm multiple times | |
for test_case in xrange(0, 1): | |
# setup logger, environment and monitor | |
logger = logging.getLogger() | |
logger.setLevel(logging.INFO) | |
env = gym.make("%s-v1" % env_name) | |
outdir = "/tmp/neat-%s-results-%d" % (env_name, test_case) | |
env.monitor.start(outdir, force=True) | |
# Train network | |
learned = trainNetwork(env, test_case) | |
# Test trained network on 1000 episodes | |
learned_genome = pickle.loads(learned) | |
net = NEAT.NeuralNetwork() | |
learned_genome.BuildESHyperNEATPhenotype(net, substrate, params) | |
# learned_genome.BuildPhenotype(net) | |
episode_count = 1000 | |
max_steps = 475 | |
for i in xrange(episode_count): | |
ob = env.reset() | |
net.Flush() | |
for j in xrange(max_steps): | |
# get next action | |
# bias for es-hyperneat | |
ob = np.append(ob, [1.]) | |
net.Input(ob) | |
net.Activate() | |
o = net.Output() | |
# action = np.clip(o,-1,1) | |
action = np.argmax(o) | |
ob, reward, done, _ = env.step(action) | |
if done: | |
break | |
# Dump result info to disk | |
env.monitor.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment