stefanopalmieri · October 14, 2016 01:34 · MadcowD · Oct 20, 2016 · stefanopalmieri · Oct 24, 2016
diff --git a/lander.py b/lander.py
 # Solution organism was found in 78th generation
 # Total of 200 * 5 * 78 = 78000 trials were used for training


 from __future__ import print_function

 import gym
 import numpy as np
 import itertools
 import os

 from neat import nn, population, statistics

 np.set_printoptions(threshold=np.inf)
 env = gym.make('LunarLanderContinuous-v2')

 # run through the population


 def eval_fitness(genomes):
    for g in genomes:

        net = nn.create_feed_forward_phenotype(g)
        total_fitness = 300

        for k in range(5):

            observation = env.reset()
            # env.render()
            fitness = 0
            reward = 0
            frames = 0

            while 1:
                inputs = observation
                # print(inputs)
                inputs = np.append(inputs, np.array(reward))
                # print (inputs)
                # active neurons
                output = net.serial_activate(inputs)

                output = np.clip(output, -1, 1)
                # print(output)
                observation, reward, done, info = env.step(np.array(output))


                fitness += reward
                frames += 1
                # env.render()
                if done or frames > 1000:
                    if fitness < total_fitness:
                        total_fitness = fitness
                    # print(fitness)

                    break
        # evaluate the fitness
        # fitness is lowest during trial
        g.fitness = total_fitness
        print(g.fitness)

 local_dir = os.path.dirname(__file__)
 config_path = os.path.join(local_dir, 'lander_config')

 pop = population.Population(config_path)
 pop.run(eval_fitness, 1000)
 winner = pop.statistics.best_genome()
 del pop

 winningnet = nn.create_feed_forward_phenotype(winner)

 env.monitor.start('walker-experiment/', force=True)


 streak = 0



 while streak < 100:
    fitness = 0
    frames = 0
    reward = 0
    observation = env.reset()
    env.render()
    while 1:
        inputs = observation

        inputs = np.append(inputs, np.array(reward))
        # active neurons
        output = winningnet.serial_activate(inputs)
        output = np.clip(output, -1, 1)
        # print(output)
        observation, reward, done, info = env.step(np.array(output))

        fitness += reward

        env.render()
        frames += 1

        if done or frames > 1000:
            if fitness >= 200:
                    print(fitness)
                    print ('streak: ', streak)
                    streak += 1
            else:
                print(fitness)
                print('streak: ', streak)
                streak = 0

            break
 print("completed!")
 env.monitor.close()
diff --git a/lander_config b/lander_config
 #--- parameters for the lander continuous v2---#

 # The `Types` section specifies which classes should be used for various
 # tasks in the NEAT algorithm.  If you use a non-default class here, you
 # must register it with your Config instance before loading the config file.
 [Types]
 stagnation_type      = DefaultStagnation
 reproduction_type    = DefaultReproduction

 [phenotype]
 input_nodes          = 9
 hidden_nodes         = 0
 output_nodes         = 2
 initial_connection   = unconnected
 max_weight           = 10
 min_weight           = -10
 feedforward          = 0
 activation_functions = tanh sigmoid relu identity
 weight_stdev         = 3

 [genetic]
 pop_size                = 200
 max_fitness_threshold   = 210
 prob_add_conn           = 0.3
 prob_add_node           = 0.1
 prob_delete_conn        = 0.05
 prob_delete_node        = 0.03
 prob_mutate_bias        = 0.00109
 bias_mutation_power     = 0.01
 prob_mutate_response    = 0.01
 response_mutation_power = 0.01
 prob_mutate_weight      = 0.3
 prob_replace_weight     = 0.03
 weight_mutation_power   = 0.1
 prob_mutate_activation  = 0.01
 prob_toggle_link        = 0.0138
 reset_on_extinction     = 1

 [genotype compatibility]
 compatibility_threshold = 3
 excess_coefficient      = 1.0
 disjoint_coefficient    = 1.0
 weight_coefficient      = 0.4

 [DefaultStagnation]
 species_fitness_func = mean
 max_stagnation       = 5

 [DefaultReproduction]
 elitism              = 3
 survival_threshold   = 0.2
	# Solution organism was found in 78th generation
	# Total of 200 * 5 * 78 = 78000 trials were used for training


	from __future__ import print_function

	import gym
	import numpy as np
	import itertools
	import os

	from neat import nn, population, statistics

	np.set_printoptions(threshold=np.inf)
	env = gym.make('LunarLanderContinuous-v2')

	# run through the population


	def eval_fitness(genomes):
	for g in genomes:

	net = nn.create_feed_forward_phenotype(g)
	total_fitness = 300

	for k in range(5):

	observation = env.reset()
	# env.render()
	fitness = 0
	reward = 0
	frames = 0

	while 1:
	inputs = observation
	# print(inputs)
	inputs = np.append(inputs, np.array(reward))
	# print (inputs)
	# active neurons
	output = net.serial_activate(inputs)

	output = np.clip(output, -1, 1)
	# print(output)
	observation, reward, done, info = env.step(np.array(output))


	fitness += reward
	frames += 1
	# env.render()
	if done or frames > 1000:
	if fitness < total_fitness:
	total_fitness = fitness
	# print(fitness)

	break
	# evaluate the fitness
	# fitness is lowest during trial
	g.fitness = total_fitness
	print(g.fitness)

	local_dir = os.path.dirname(__file__)
	config_path = os.path.join(local_dir, 'lander_config')

	pop = population.Population(config_path)
	pop.run(eval_fitness, 1000)
	winner = pop.statistics.best_genome()
	del pop

	winningnet = nn.create_feed_forward_phenotype(winner)

	env.monitor.start('walker-experiment/', force=True)


	streak = 0



	while streak < 100:
	fitness = 0
	frames = 0
	reward = 0
	observation = env.reset()
	env.render()
	while 1:
	inputs = observation

	inputs = np.append(inputs, np.array(reward))
	# active neurons
	output = winningnet.serial_activate(inputs)
	output = np.clip(output, -1, 1)
	# print(output)
	observation, reward, done, info = env.step(np.array(output))

	fitness += reward

	env.render()
	frames += 1

	if done or frames > 1000:
	if fitness >= 200:
	print(fitness)
	print ('streak: ', streak)
	streak += 1
	else:
	print(fitness)
	print('streak: ', streak)
	streak = 0

	break
	print("completed!")
	env.monitor.close()
	#--- parameters for the lander continuous v2---#

	# The `Types` section specifies which classes should be used for various
	# tasks in the NEAT algorithm. If you use a non-default class here, you
	# must register it with your Config instance before loading the config file.
	[Types]
	stagnation_type = DefaultStagnation
	reproduction_type = DefaultReproduction

	[phenotype]
	input_nodes = 9
	hidden_nodes = 0
	output_nodes = 2
	initial_connection = unconnected
	max_weight = 10
	min_weight = -10
	feedforward = 0
	activation_functions = tanh sigmoid relu identity
	weight_stdev = 3

	[genetic]
	pop_size = 200
	max_fitness_threshold = 210
	prob_add_conn = 0.3
	prob_add_node = 0.1
	prob_delete_conn = 0.05
	prob_delete_node = 0.03
	prob_mutate_bias = 0.00109
	bias_mutation_power = 0.01
	prob_mutate_response = 0.01
	response_mutation_power = 0.01
	prob_mutate_weight = 0.3
	prob_replace_weight = 0.03
	weight_mutation_power = 0.1
	prob_mutate_activation = 0.01
	prob_toggle_link = 0.0138
	reset_on_extinction = 1

	[genotype compatibility]
	compatibility_threshold = 3
	excess_coefficient = 1.0
	disjoint_coefficient = 1.0
	weight_coefficient = 0.4

	[DefaultStagnation]
	species_fitness_func = mean
	max_stagnation = 5

	[DefaultReproduction]
	elitism = 3
	survival_threshold = 0.2