stefanopalmieri · October 29, 2016 17:06
diff --git a/es-hyperneat-cartpole-v1 b/es-hyperneat-cartpole-v1
 # A neural network is trained using ES-HyperNEAT
 # The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies"
 # This gist is using MultiNEAT (http://multineat.com/)

 import logging
 import numpy as np
 import pickle

 import gym

 import MultiNEAT as NEAT

 # NEAT setup
 params = NEAT.Parameters()
 params.PopulationSize = 20;

 params.DynamicCompatibility = True;
 params.CompatTreshold = 2.0;
 params.YoungAgeTreshold = 15;
 params.SpeciesMaxStagnation = 100;
 params.OldAgeTreshold = 35;
 params.MinSpecies = 5;
 params.MaxSpecies = 10;
 params.RouletteWheelSelection = False;

 params.MutateRemLinkProb = 0.02;
 params.RecurrentProb = 0;
 params.OverallMutationRate = 0.15;
 params.MutateAddLinkProb = 0.08;
 params.MutateAddNeuronProb = 0.01;
 params.MutateWeightsProb = 0.90;
 params.MaxWeight = 8.0;
 params.WeightMutationMaxPower = 0.2;
 params.WeightReplacementMaxPower = 1.0;

 params.MutateActivationAProb = 0.0;
 params.ActivationAMutationMaxPower = 0.5;
 params.MinActivationA = 0.05;
 params.MaxActivationA = 6.0;

 params.MutateNeuronActivationTypeProb = 0.03;

 params.ActivationFunction_SignedSigmoid_Prob = 0.0;
 params.ActivationFunction_UnsignedSigmoid_Prob = 0.0;
 params.ActivationFunction_Tanh_Prob = 1.0;
 params.ActivationFunction_TanhCubic_Prob = 0.0;
 params.ActivationFunction_SignedStep_Prob = 1.0;
 params.ActivationFunction_UnsignedStep_Prob = 0.0;
 params.ActivationFunction_SignedGauss_Prob = 1.0;
 params.ActivationFunction_UnsignedGauss_Prob = 0.0;
 params.ActivationFunction_Abs_Prob = 0.0;
 params.ActivationFunction_SignedSine_Prob = 1.0;
 params.ActivationFunction_UnsignedSine_Prob = 0.0;
 params.ActivationFunction_Linear_Prob = 1.0;

 params.DivisionThreshold = 0.5;
 params.VarianceThreshold = 0.03;
 params.BandThreshold = 0.3;
 params.InitialDepth = 2;
 params.MaxDepth = 3;
 params.IterationLevel = 1;
 params.Leo = False;
 params.GeometrySeed = False;
 params.LeoSeed = False;
 params.LeoThreshold = 0.3;
 params.CPPN_Bias = -1.0;
 params.Qtree_X = 0.0;
 params.Qtree_Y = 0.0;
 params.Width = 1.;
 params.Height = 1.;
 params.Elitism = 0.1;

 rng = NEAT.RNG()
 rng.TimeSeed()

 list = []

 for i in range(0,4):
 	list.append((-1. +(2.*i/3.), -1., 0.))
 print(list)
 #for i in range(0,10):
 #	list.append((-1. +(2.*i/9), -0.5, 0))

 # append bias input
 list.append((0., -0.8, 0.))


 substrate = NEAT.Substrate(list,
                           [],
                           [(-1., 1., 0.), (1., 1., 0.)])

 substrate.m_allow_input_hidden_links = True;
 substrate.m_allow_input_output_links = True;
 substrate.m_allow_hidden_hidden_links = True;
 substrate.m_allow_hidden_output_links = True;
 substrate.m_allow_output_hidden_links = True;
 substrate.m_allow_output_output_links = False;
 substrate.m_allow_looped_hidden_links = True;
 substrate.m_allow_looped_output_links = True;

 substrate.m_allow_input_hidden_links = True;
 substrate.m_allow_input_output_links = True;
 substrate.m_allow_hidden_output_links = True;
 substrate.m_allow_hidden_hidden_links = True;

 substrate.m_hidden_nodes_activation = NEAT.ActivationFunction.SIGNED_SIGMOID;
 substrate.m_output_nodes_activation = NEAT.ActivationFunction.UNSIGNED_SIGMOID;

 substrate.m_with_distance = False;

 substrate.m_max_weight_and_bias = 8.0;


 def trainNetwork(env, seed):
    # Training parameters
    generationSize = 30
    episode_count = 10
    max_steps = 475
    # Max reward for environments that reward 1 for each succesfull step (e.g. CartPole-v0)
    max_reward = episode_count * max_steps

    def evaluate(genome):
        net = NEAT.NeuralNetwork()
        genome.BuildESHyperNEATPhenotype(net, substrate, params)
 	# genome.BuildPhenotype(net)

        cum_reward = 0

        for i in xrange(episode_count):
            ob = env.reset()
            net.Flush()

            for j in xrange(max_steps):
                # get next action
 		# bias for es-hyperneat
 		ob = np.append(ob, [1.])
                net.Input(ob)
                net.Activate()
                o = net.Output()
                # action = np.clip(o,-1,1)
 		action = np.argmax(o)
                ob, reward, done, _ = env.step(action)
                cum_reward += reward
                if done:
                    break

        return cum_reward

    # Create initial genome
    g = NEAT.Genome(0, 5, 0, 2, False, 
                    NEAT.ActivationFunction.TANH, NEAT.ActivationFunction.TANH, 0, params)
    pop = NEAT.Population(g, params, True, 1.0, seed)

    current_best = None

    for generation in range(generationSize):
        for i_episode, genome in enumerate(NEAT.GetGenomeList(pop)):
            reward = evaluate(genome)

            if reward == max_reward:
                return pickle.dumps(genome)

            genome.SetFitness(reward)

        print('Generation: {}, max fitness: {}'.format(generation,
                            max((x.GetFitness() for x in NEAT.GetGenomeList(pop)))))
        current_best = pickle.dumps(pop.GetBestGenome())
        pop.Epoch()


    return current_best

 env_name = "CartPole"

 if __name__ == '__main__':
    # Test the algorithm multiple times
    for test_case in xrange(0, 1):
        # setup logger, environment and monitor
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        env = gym.make("%s-v1" % env_name)
        outdir = "/tmp/neat-%s-results-%d" % (env_name, test_case)
        env.monitor.start(outdir, force=True)

        # Train network
        learned = trainNetwork(env, test_case)

        # Test trained network on 1000 episodes
        learned_genome = pickle.loads(learned)
        net = NEAT.NeuralNetwork()
        learned_genome.BuildESHyperNEATPhenotype(net, substrate, params)
 	# learned_genome.BuildPhenotype(net)

        episode_count = 1000
        max_steps = 475

        for i in xrange(episode_count):
            ob = env.reset()
            net.Flush()

            for j in xrange(max_steps):
                # get next action
 		# bias for es-hyperneat
 		ob = np.append(ob, [1.])
                net.Input(ob)
                net.Activate()
                o = net.Output()
                # action = np.clip(o,-1,1)
 		action = np.argmax(o)
                ob, reward, done, _ = env.step(action)
                if done:
                    break


        # Dump result info to disk
        env.monitor.close()
	# A neural network is trained using ES-HyperNEAT
	# The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies"
	# This gist is using MultiNEAT (http://multineat.com/)

	import logging
	import numpy as np
	import pickle

	import gym

	import MultiNEAT as NEAT

	# NEAT setup
	params = NEAT.Parameters()
	params.PopulationSize = 20;

	params.DynamicCompatibility = True;
	params.CompatTreshold = 2.0;
	params.YoungAgeTreshold = 15;
	params.SpeciesMaxStagnation = 100;
	params.OldAgeTreshold = 35;
	params.MinSpecies = 5;
	params.MaxSpecies = 10;
	params.RouletteWheelSelection = False;

	params.MutateRemLinkProb = 0.02;
	params.RecurrentProb = 0;
	params.OverallMutationRate = 0.15;
	params.MutateAddLinkProb = 0.08;
	params.MutateAddNeuronProb = 0.01;
	params.MutateWeightsProb = 0.90;
	params.MaxWeight = 8.0;
	params.WeightMutationMaxPower = 0.2;
	params.WeightReplacementMaxPower = 1.0;

	params.MutateActivationAProb = 0.0;
	params.ActivationAMutationMaxPower = 0.5;
	params.MinActivationA = 0.05;
	params.MaxActivationA = 6.0;

	params.MutateNeuronActivationTypeProb = 0.03;

	params.ActivationFunction_SignedSigmoid_Prob = 0.0;
	params.ActivationFunction_UnsignedSigmoid_Prob = 0.0;
	params.ActivationFunction_Tanh_Prob = 1.0;
	params.ActivationFunction_TanhCubic_Prob = 0.0;
	params.ActivationFunction_SignedStep_Prob = 1.0;
	params.ActivationFunction_UnsignedStep_Prob = 0.0;
	params.ActivationFunction_SignedGauss_Prob = 1.0;
	params.ActivationFunction_UnsignedGauss_Prob = 0.0;
	params.ActivationFunction_Abs_Prob = 0.0;
	params.ActivationFunction_SignedSine_Prob = 1.0;
	params.ActivationFunction_UnsignedSine_Prob = 0.0;
	params.ActivationFunction_Linear_Prob = 1.0;

	params.DivisionThreshold = 0.5;
	params.VarianceThreshold = 0.03;
	params.BandThreshold = 0.3;
	params.InitialDepth = 2;
	params.MaxDepth = 3;
	params.IterationLevel = 1;
	params.Leo = False;
	params.GeometrySeed = False;
	params.LeoSeed = False;
	params.LeoThreshold = 0.3;
	params.CPPN_Bias = -1.0;
	params.Qtree_X = 0.0;
	params.Qtree_Y = 0.0;
	params.Width = 1.;
	params.Height = 1.;
	params.Elitism = 0.1;

	rng = NEAT.RNG()
	rng.TimeSeed()

	list = []

	for i in range(0,4):
	list.append((-1. +(2.*i/3.), -1., 0.))
	print(list)
	#for i in range(0,10):
	# list.append((-1. +(2.*i/9), -0.5, 0))

	# append bias input
	list.append((0., -0.8, 0.))


	substrate = NEAT.Substrate(list,
	[],
	[(-1., 1., 0.), (1., 1., 0.)])

	substrate.m_allow_input_hidden_links = True;
	substrate.m_allow_input_output_links = True;
	substrate.m_allow_hidden_hidden_links = True;
	substrate.m_allow_hidden_output_links = True;
	substrate.m_allow_output_hidden_links = True;
	substrate.m_allow_output_output_links = False;
	substrate.m_allow_looped_hidden_links = True;
	substrate.m_allow_looped_output_links = True;

	substrate.m_allow_input_hidden_links = True;
	substrate.m_allow_input_output_links = True;
	substrate.m_allow_hidden_output_links = True;
	substrate.m_allow_hidden_hidden_links = True;

	substrate.m_hidden_nodes_activation = NEAT.ActivationFunction.SIGNED_SIGMOID;
	substrate.m_output_nodes_activation = NEAT.ActivationFunction.UNSIGNED_SIGMOID;

	substrate.m_with_distance = False;

	substrate.m_max_weight_and_bias = 8.0;


	def trainNetwork(env, seed):
	# Training parameters
	generationSize = 30
	episode_count = 10
	max_steps = 475
	# Max reward for environments that reward 1 for each succesfull step (e.g. CartPole-v0)
	max_reward = episode_count * max_steps

	def evaluate(genome):
	net = NEAT.NeuralNetwork()
	genome.BuildESHyperNEATPhenotype(net, substrate, params)
	# genome.BuildPhenotype(net)

	cum_reward = 0

	for i in xrange(episode_count):
	ob = env.reset()
	net.Flush()

	for j in xrange(max_steps):
	# get next action
	# bias for es-hyperneat
	ob = np.append(ob, [1.])
	net.Input(ob)
	net.Activate()
	o = net.Output()
	# action = np.clip(o,-1,1)
	action = np.argmax(o)
	ob, reward, done, _ = env.step(action)
	cum_reward += reward
	if done:
	break

	return cum_reward

	# Create initial genome
	g = NEAT.Genome(0, 5, 0, 2, False,
	NEAT.ActivationFunction.TANH, NEAT.ActivationFunction.TANH, 0, params)
	pop = NEAT.Population(g, params, True, 1.0, seed)

	current_best = None

	for generation in range(generationSize):
	for i_episode, genome in enumerate(NEAT.GetGenomeList(pop)):
	reward = evaluate(genome)

	if reward == max_reward:
	return pickle.dumps(genome)

	genome.SetFitness(reward)

	print('Generation: {}, max fitness: {}'.format(generation,
	max((x.GetFitness() for x in NEAT.GetGenomeList(pop)))))
	current_best = pickle.dumps(pop.GetBestGenome())
	pop.Epoch()


	return current_best

	env_name = "CartPole"

	if __name__ == '__main__':
	# Test the algorithm multiple times
	for test_case in xrange(0, 1):
	# setup logger, environment and monitor
	logger = logging.getLogger()
	logger.setLevel(logging.INFO)
	env = gym.make("%s-v1" % env_name)
	outdir = "/tmp/neat-%s-results-%d" % (env_name, test_case)
	env.monitor.start(outdir, force=True)

	# Train network
	learned = trainNetwork(env, test_case)

	# Test trained network on 1000 episodes
	learned_genome = pickle.loads(learned)
	net = NEAT.NeuralNetwork()
	learned_genome.BuildESHyperNEATPhenotype(net, substrate, params)
	# learned_genome.BuildPhenotype(net)

	episode_count = 1000
	max_steps = 475

	for i in xrange(episode_count):
	ob = env.reset()
	net.Flush()

	for j in xrange(max_steps):
	# get next action
	# bias for es-hyperneat
	ob = np.append(ob, [1.])
	net.Input(ob)
	net.Activate()
	o = net.Output()
	# action = np.clip(o,-1,1)
	action = np.argmax(o)
	ob, reward, done, _ = env.step(action)
	if done:
	break


	# Dump result info to disk
	env.monitor.close()