stefanopalmieri · October 26, 2016 22:47 · filipenovais · Aug 29, 2019
diff --git a/bipedal-es-hyperneat-0.py b/bipedal-es-hyperneat-0.py
 # Using ES-HyperNEAT to try to solve the Bipedal walker.
 # This attempt was not successful. Adjustment of hyperparameters is likely needed.

 # A neural network is trained using NeuroEvolution of Augmenting Topologies
 # The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies"
 # This gist is using MultiNEAT (http://multineat.com/)

 import logging
 import numpy as np
 import pickle

 import gym

 import MultiNEAT as NEAT

 # NEAT setup
 params = NEAT.Parameters()
 params.PopulationSize = 200;

 params.DynamicCompatibility = True;
 params.CompatTreshold = 2.0;
 params.YoungAgeTreshold = 15;
 params.SpeciesMaxStagnation = 100;
 params.OldAgeTreshold = 35;
 params.MinSpecies = 5;
 params.MaxSpecies = 10;
 params.RouletteWheelSelection = False;

 params.MutateRemLinkProb = 0.02;
 params.RecurrentProb = 0;
 params.OverallMutationRate = 0.15;
 params.MutateAddLinkProb = 0.08;
 params.MutateAddNeuronProb = 0.01;
 params.MutateWeightsProb = 0.90;
 params.MaxWeight = 8.0;
 params.WeightMutationMaxPower = 0.2;
 params.WeightReplacementMaxPower = 1.0;

 params.MutateActivationAProb = 0.0;
 params.ActivationAMutationMaxPower = 0.5;
 params.MinActivationA = 0.05;
 params.MaxActivationA = 6.0;

 params.MutateNeuronActivationTypeProb = 0.03;

 params.ActivationFunction_SignedSigmoid_Prob = 0.0;
 params.ActivationFunction_UnsignedSigmoid_Prob = 0.0;
 params.ActivationFunction_Tanh_Prob = 1.0;
 params.ActivationFunction_TanhCubic_Prob = 0.0;
 params.ActivationFunction_SignedStep_Prob = 1.0;
 params.ActivationFunction_UnsignedStep_Prob = 0.0;
 params.ActivationFunction_SignedGauss_Prob = 1.0;
 params.ActivationFunction_UnsignedGauss_Prob = 0.0;
 params.ActivationFunction_Abs_Prob = 0.0;
 params.ActivationFunction_SignedSine_Prob = 1.0;
 params.ActivationFunction_UnsignedSine_Prob = 0.0;
 params.ActivationFunction_Linear_Prob = 1.0;

 params.DivisionThreshold = 0.5;
 params.VarianceThreshold = 0.03;
 params.BandThreshold = 0.3;
 params.InitialDepth = 2;
 params.MaxDepth = 3;
 params.IterationLevel = 1;
 params.Leo = False;
 params.GeometrySeed = False;
 params.LeoSeed = False;
 params.LeoThreshold = 0.3;
 params.CPPN_Bias = -1.0;
 params.Qtree_X = 0.0;
 params.Qtree_Y = 0.0;
 params.Width = 1.;
 params.Height = 1.;
 params.Elitism = 0.1;

 rng = NEAT.RNG()
 rng.TimeSeed()

 list = []

 for i in range(0,14):
 	list.append((-1. +(2.*i/13.), -1., 0.))

 for i in range(0,10):
 	list.append((-1. +(2.*i/9), -0.5, 0))


 substrate = NEAT.Substrate(list,
                           [],
                           [(-1., 1., 0.), (-0.5, 1., 0.), (0.5, 1., 0.), (1., 1., 0.)])

 substrate.m_allow_input_hidden_links = False;
 substrate.m_allow_input_output_links = False;
 substrate.m_allow_hidden_hidden_links = False;
 substrate.m_allow_hidden_output_links = False;
 substrate.m_allow_output_hidden_links = False;
 substrate.m_allow_output_output_links = False;
 substrate.m_allow_looped_hidden_links = True;
 substrate.m_allow_looped_output_links = False;

 substrate.m_allow_input_hidden_links = True;
 substrate.m_allow_input_output_links = False;
 substrate.m_allow_hidden_output_links = True;
 substrate.m_allow_hidden_hidden_links = True;

 substrate.m_hidden_nodes_activation = NEAT.ActivationFunction.SIGNED_SIGMOID;
 substrate.m_output_nodes_activation = NEAT.ActivationFunction.UNSIGNED_SIGMOID;

 substrate.m_with_distance = False;

 substrate.m_max_weight_and_bias = 8.0;


 def trainNetwork(env, seed):
    # Training parameters
    generationSize = 50
    episode_count = 10
    max_steps = 1000
    # Max reward for environments that reward 1 for each succesfull step (e.g. CartPole-v0)
    max_reward = episode_count * max_steps

    def evaluate(genome):
        net = NEAT.NeuralNetwork()
        genome.BuildESHyperNEATPhenotype(net, substrate, params)

        cum_reward = 0

        for i in xrange(episode_count):
            ob = env.reset()
            net.Flush()

            for j in xrange(max_steps):
                # get next action
                net.Input(ob)
                net.Activate()
                o = net.Output()
                action = np.clip(o,-1,1)
                ob, reward, done, _ = env.step(action)
                cum_reward += reward
                if done:
                    break

        return cum_reward

    # Create initial genome
    g = NEAT.Genome(0, 24, 0, 4, False, 
                    NEAT.ActivationFunction.TANH, NEAT.ActivationFunction.TANH, 0, params)
    pop = NEAT.Population(g, params, True, 1.0, seed)

    current_best = None

    for generation in range(generationSize):
        for i_episode, genome in enumerate(NEAT.GetGenomeList(pop)):
            reward = evaluate(genome)

            if reward == max_reward:
                return pickle.dumps(genome)

            genome.SetFitness(reward)

        print('Generation: {}, max fitness: {}'.format(generation,
                            max((x.GetFitness() for x in NEAT.GetGenomeList(pop)))))
        current_best = pickle.dumps(pop.GetBestGenome())
        pop.Epoch()


    return current_best

 env_name = "BipedalWalker"

 if __name__ == '__main__':
    # Test the algorithm multiple times
    for test_case in xrange(0, 1):
        # setup logger, environment and monitor
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        env = gym.make("%s-v2" % env_name)
        outdir = "/tmp/neat-%s-results-%d" % (env_name, test_case)
        env.monitor.start(outdir, force=True)

        # Train network
        learned = trainNetwork(env, test_case)

        # Test trained network on 1000 episodes
        learned_genome = pickle.loads(learned)
        net = NEAT.NeuralNetwork()
        learned_genome.BuildESHyperNEATPhenotype( net,substrate, params)

        episode_count = 1000
        max_steps = 1000

        for i in xrange(episode_count):
            ob = env.reset()
            net.Flush()

            for j in xrange(max_steps):
                # get next action
                net.Input(ob)
                net.Activate()
                o = net.Output()
                action = np.clip(o,-1,1)
                ob, reward, done, _ = env.step(action)
                if done:
                    break


        # Dump result info to disk
        env.monitor.close()
	# Using ES-HyperNEAT to try to solve the Bipedal walker.
	# This attempt was not successful. Adjustment of hyperparameters is likely needed.

	# A neural network is trained using NeuroEvolution of Augmenting Topologies
	# The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies"
	# This gist is using MultiNEAT (http://multineat.com/)

	import logging
	import numpy as np
	import pickle

	import gym

	import MultiNEAT as NEAT

	# NEAT setup
	params = NEAT.Parameters()
	params.PopulationSize = 200;

	params.DynamicCompatibility = True;
	params.CompatTreshold = 2.0;
	params.YoungAgeTreshold = 15;
	params.SpeciesMaxStagnation = 100;
	params.OldAgeTreshold = 35;
	params.MinSpecies = 5;
	params.MaxSpecies = 10;
	params.RouletteWheelSelection = False;

	params.MutateRemLinkProb = 0.02;
	params.RecurrentProb = 0;
	params.OverallMutationRate = 0.15;
	params.MutateAddLinkProb = 0.08;
	params.MutateAddNeuronProb = 0.01;
	params.MutateWeightsProb = 0.90;
	params.MaxWeight = 8.0;
	params.WeightMutationMaxPower = 0.2;
	params.WeightReplacementMaxPower = 1.0;

	params.MutateActivationAProb = 0.0;
	params.ActivationAMutationMaxPower = 0.5;
	params.MinActivationA = 0.05;
	params.MaxActivationA = 6.0;

	params.MutateNeuronActivationTypeProb = 0.03;

	params.ActivationFunction_SignedSigmoid_Prob = 0.0;
	params.ActivationFunction_UnsignedSigmoid_Prob = 0.0;
	params.ActivationFunction_Tanh_Prob = 1.0;
	params.ActivationFunction_TanhCubic_Prob = 0.0;
	params.ActivationFunction_SignedStep_Prob = 1.0;
	params.ActivationFunction_UnsignedStep_Prob = 0.0;
	params.ActivationFunction_SignedGauss_Prob = 1.0;
	params.ActivationFunction_UnsignedGauss_Prob = 0.0;
	params.ActivationFunction_Abs_Prob = 0.0;
	params.ActivationFunction_SignedSine_Prob = 1.0;
	params.ActivationFunction_UnsignedSine_Prob = 0.0;
	params.ActivationFunction_Linear_Prob = 1.0;

	params.DivisionThreshold = 0.5;
	params.VarianceThreshold = 0.03;
	params.BandThreshold = 0.3;
	params.InitialDepth = 2;
	params.MaxDepth = 3;
	params.IterationLevel = 1;
	params.Leo = False;
	params.GeometrySeed = False;
	params.LeoSeed = False;
	params.LeoThreshold = 0.3;
	params.CPPN_Bias = -1.0;
	params.Qtree_X = 0.0;
	params.Qtree_Y = 0.0;
	params.Width = 1.;
	params.Height = 1.;
	params.Elitism = 0.1;

	rng = NEAT.RNG()
	rng.TimeSeed()

	list = []

	for i in range(0,14):
	list.append((-1. +(2.*i/13.), -1., 0.))

	for i in range(0,10):
	list.append((-1. +(2.*i/9), -0.5, 0))


	substrate = NEAT.Substrate(list,
	[],
	[(-1., 1., 0.), (-0.5, 1., 0.), (0.5, 1., 0.), (1., 1., 0.)])

	substrate.m_allow_input_hidden_links = False;
	substrate.m_allow_input_output_links = False;
	substrate.m_allow_hidden_hidden_links = False;
	substrate.m_allow_hidden_output_links = False;
	substrate.m_allow_output_hidden_links = False;
	substrate.m_allow_output_output_links = False;
	substrate.m_allow_looped_hidden_links = True;
	substrate.m_allow_looped_output_links = False;

	substrate.m_allow_input_hidden_links = True;
	substrate.m_allow_input_output_links = False;
	substrate.m_allow_hidden_output_links = True;
	substrate.m_allow_hidden_hidden_links = True;

	substrate.m_hidden_nodes_activation = NEAT.ActivationFunction.SIGNED_SIGMOID;
	substrate.m_output_nodes_activation = NEAT.ActivationFunction.UNSIGNED_SIGMOID;

	substrate.m_with_distance = False;

	substrate.m_max_weight_and_bias = 8.0;


	def trainNetwork(env, seed):
	# Training parameters
	generationSize = 50
	episode_count = 10
	max_steps = 1000
	# Max reward for environments that reward 1 for each succesfull step (e.g. CartPole-v0)
	max_reward = episode_count * max_steps

	def evaluate(genome):
	net = NEAT.NeuralNetwork()
	genome.BuildESHyperNEATPhenotype(net, substrate, params)

	cum_reward = 0

	for i in xrange(episode_count):
	ob = env.reset()
	net.Flush()

	for j in xrange(max_steps):
	# get next action
	net.Input(ob)
	net.Activate()
	o = net.Output()
	action = np.clip(o,-1,1)
	ob, reward, done, _ = env.step(action)
	cum_reward += reward
	if done:
	break

	return cum_reward

	# Create initial genome
	g = NEAT.Genome(0, 24, 0, 4, False,
	NEAT.ActivationFunction.TANH, NEAT.ActivationFunction.TANH, 0, params)
	pop = NEAT.Population(g, params, True, 1.0, seed)

	current_best = None

	for generation in range(generationSize):
	for i_episode, genome in enumerate(NEAT.GetGenomeList(pop)):
	reward = evaluate(genome)

	if reward == max_reward:
	return pickle.dumps(genome)

	genome.SetFitness(reward)

	print('Generation: {}, max fitness: {}'.format(generation,
	max((x.GetFitness() for x in NEAT.GetGenomeList(pop)))))
	current_best = pickle.dumps(pop.GetBestGenome())
	pop.Epoch()


	return current_best

	env_name = "BipedalWalker"

	if __name__ == '__main__':
	# Test the algorithm multiple times
	for test_case in xrange(0, 1):
	# setup logger, environment and monitor
	logger = logging.getLogger()
	logger.setLevel(logging.INFO)
	env = gym.make("%s-v2" % env_name)
	outdir = "/tmp/neat-%s-results-%d" % (env_name, test_case)
	env.monitor.start(outdir, force=True)

	# Train network
	learned = trainNetwork(env, test_case)

	# Test trained network on 1000 episodes
	learned_genome = pickle.loads(learned)
	net = NEAT.NeuralNetwork()
	learned_genome.BuildESHyperNEATPhenotype( net,substrate, params)

	episode_count = 1000
	max_steps = 1000

	for i in xrange(episode_count):
	ob = env.reset()
	net.Flush()

	for j in xrange(max_steps):
	# get next action
	net.Input(ob)
	net.Activate()
	o = net.Output()
	action = np.clip(o,-1,1)
	ob, reward, done, _ = env.step(action)
	if done:
	break


	# Dump result info to disk
	env.monitor.close()