Last active
August 24, 2022 04:34
-
-
Save CodeReclaimers/ba4f28ccf4ec982947ebe7bfd08a00c0 to your computer and use it in GitHub Desktop.
OpenAI Gym LunarLander-v2 writeup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# neat-python configuration for the LunarLander-v2 environment on OpenAI Gym | |
# Sample run here: https://gym.openai.com/evaluations/eval_FbKq5MxAS9GlvB7W6ioJkg | |
# NOTE: This was run using revision 1186029827c156e0ff6f9b36d6847eb2aa56757a of CodeReclaimers/neat-python, not a release on PyPI. | |
[NEAT] | |
pop_size = 150 | |
# Note: the fitness threshold will never be reached because | |
# we are controlling the termination ourselves based on simulation performance. | |
fitness_criterion = max | |
fitness_threshold = 1000.0 | |
reset_on_extinction = 0 | |
[DefaultGenome] | |
num_inputs = 8 | |
num_hidden = 0 | |
num_outputs = 4 | |
initial_connection = full | |
feed_forward = True | |
compatibility_disjoint_coefficient = 1.0 | |
compatibility_weight_coefficient = 1.0 | |
conn_add_prob = 0.15 | |
conn_delete_prob = 0.1 | |
node_add_prob = 0.15 | |
node_delete_prob = 0.1 | |
activation_default = clamped | |
activation_options = clamped | |
activation_mutate_rate = 0.0 | |
aggregation_default = sum | |
aggregation_options = sum | |
aggregation_mutate_rate = 0.0 | |
bias_init_mean = 0.0 | |
bias_init_stdev = 1.0 | |
bias_replace_rate = 0.02 | |
bias_mutate_rate = 0.8 | |
bias_mutate_power = 0.4 | |
bias_max_value = 30.0 | |
bias_min_value = -30.0 | |
response_init_mean = 1.0 | |
response_init_stdev = 0.0 | |
response_replace_rate = 0.0 | |
response_mutate_rate = 0.1 | |
response_mutate_power = 0.01 | |
response_max_value = 30.0 | |
response_min_value = -30.0 | |
weight_max_value = 30 | |
weight_min_value = -30 | |
weight_init_mean = 0.0 | |
weight_init_stdev = 1.0 | |
weight_mutate_rate = 0.8 | |
weight_replace_rate = 0.02 | |
weight_mutate_power = 0.4 | |
enabled_default = True | |
enabled_mutate_rate = 0.01 | |
[DefaultSpeciesSet] | |
compatibility_threshold = 3.0 | |
[DefaultStagnation] | |
species_fitness_func = mean | |
max_stagnation = 15 | |
species_elitism = 4 | |
[DefaultReproduction] | |
elitism = 2 | |
survival_threshold = 0.2 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Evolve a control/reward estimation network for the OpenAI Gym | |
# LunarLander-v2 environment (https://gym.openai.com/envs/LunarLander-v2). | |
# Sample run here: https://gym.openai.com/evaluations/eval_FbKq5MxAS9GlvB7W6ioJkg | |
# NOTE: This was run using revision 1186029827c156e0ff6f9b36d6847eb2aa56757a of CodeReclaimers/neat-python, not a release on PyPI. | |
from __future__ import print_function | |
import gym | |
import gym.wrappers | |
import matplotlib.pyplot as plt | |
import multiprocessing | |
import neat | |
import numpy as np | |
import os | |
import pickle | |
import random | |
import time | |
import visualize | |
env = gym.make('LunarLander-v2') | |
print("action space: {0!r}".format(env.action_space)) | |
print("observation space: {0!r}".format(env.observation_space)) | |
# Limit episode time steps to cut down on training time. | |
# 400 steps is more than enough time to land with a winning score. | |
print(env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')) | |
env.spec.tags['wrapper_config.TimeLimit.max_episode_steps'] = 400 | |
print(env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')) | |
env = gym.wrappers.Monitor(env, 'results', force=True) | |
discounted_reward = 0.9 | |
min_reward = -200 | |
max_reward = 200 | |
score_range = [] | |
def compute_fitness(net, discounted_rewards, episodes): | |
reward_error = [] | |
for discount_reward, episode in zip(discounted_rewards, episodes): | |
for (j, observation, action, reward), dr in zip(episode, discount_reward): | |
output = net.activate(observation) | |
reward_error.append(float((output[action] - dr) ** 2)) | |
return reward_error | |
class PooledErrorCompute(object): | |
def __init__(self): | |
self.pool = multiprocessing.Pool() | |
def evaluate_genomes(self, genomes, config): | |
t0 = time.time() | |
nets = [] | |
for gid, g in genomes: | |
nets.append((g, neat.nn.FeedForwardNetwork.create(g, config))) | |
g.fitness = [] | |
print("network creation time {0}".format(time.time() - t0)) | |
t0 = time.time() | |
episodes = [] | |
for genome, net in nets: | |
observation = env.reset() | |
episode_data = [] | |
j = 0 | |
total_score = 0.0 | |
while 1: | |
if net is not None: | |
output = net.activate(observation) | |
action = np.argmax(output) | |
else: | |
action = env.action_space.sample() | |
observation, reward, done, info = env.step(action) | |
total_score += reward | |
episode_data.append((j, observation, action, reward)) | |
if done: | |
break | |
j += 1 | |
episodes.append((total_score, episode_data)) | |
genome.fitness = total_score | |
print("simulation run time {0}".format(time.time() - t0)) | |
t0 = time.time() | |
scores = [s for s, e in episodes] | |
score_range.append((min(scores), np.mean(scores), max(scores))) | |
# Compute discounted rewards. | |
discounted_rewards = [] | |
for score, episode in episodes: | |
rewards = np.array([reward for j, observation, action, reward in episode]) | |
N = len(episode) | |
D = np.sum((np.eye(N, k=i) * discounted_reward ** i for i in range(N))) | |
discounted_rewards.append(np.dot(D, rewards)) | |
print(min(map(np.min, discounted_rewards)), max(map(np.max, discounted_rewards))) | |
# Normalize rewards | |
for i in range(len(discounted_rewards)): | |
discounted_rewards[i] = 2 * (discounted_rewards[i] - min_reward) / (max_reward - min_reward) - 1.0 | |
print(min(map(np.min, discounted_rewards)), max(map(np.max, discounted_rewards))) | |
print("discounted & normalized reward compute time {0}".format(time.time() - t0)) | |
t0 = time.time() | |
# Randomly choose subset of episodes for evaluation of genome reward estimation. | |
comparison_episodes = [random.choice(episodes)[1] for _ in range(10)] | |
jobs = [] | |
for genome, net in nets: | |
jobs.append(self.pool.apply_async(compute_fitness, (net, discounted_rewards, comparison_episodes))) | |
# Assign a composite fitness to each genome; genomes can make progress either | |
# by improving their total reward or by making more accurate reward estimates. | |
for job, (genome_id, genome) in zip(jobs, genomes): | |
reward_error = job.get(timeout=None) | |
genome.fitness -= 150 * np.mean(reward_error) | |
print("final fitness compute time {0}\n".format(time.time() - t0)) | |
def run(): | |
# Load the config file, which is assumed to live in | |
# the same directory as this script. | |
local_dir = os.path.dirname(__file__) | |
config_path = os.path.join(local_dir, 'config') | |
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, | |
neat.DefaultSpeciesSet, neat.DefaultStagnation, | |
config_path) | |
pop = neat.Population(config) | |
stats = neat.StatisticsReporter() | |
pop.add_reporter(stats) | |
pop.add_reporter(neat.StdOutReporter(True)) | |
# Checkpoint every 10 generations or 900 seconds. | |
pop.add_reporter(neat.Checkpointer(10, 900)) | |
# Run until the winner from a generation is able to solve the environment | |
# or the user interrupts the process. | |
ec = PooledErrorCompute() | |
while 1: | |
try: | |
pop.run(ec.evaluate_genomes, 1) | |
visualize.plot_stats(stats, ylog=False, view=False, filename="fitness.svg") | |
if score_range: | |
S = np.array(score_range).T | |
plt.plot(S[0], 'r-') | |
plt.plot(S[1], 'b-') | |
plt.plot(S[2], 'g-') | |
plt.grid() | |
plt.savefig("score-ranges.svg") | |
plt.close() | |
mfs = sum(stats.get_fitness_mean()[-5:]) / 5.0 | |
print("Average mean fitness over last 5 generations: {0}".format(mfs)) | |
mfs = sum(stats.get_fitness_stat(min)[-5:]) / 5.0 | |
print("Average min fitness over last 5 generations: {0}".format(mfs)) | |
# Use the five best genomes seen so far as an ensemble-ish control system. | |
best_genomes = stats.best_unique_genomes(5) | |
best_networks = [] | |
for g in best_genomes: | |
best_networks.append(neat.nn.FeedForwardNetwork.create(g, config)) | |
solved = True | |
best_scores = [] | |
for k in range(100): | |
observation = env.reset() | |
score = 0 | |
while 1: | |
# Use the total reward estimates from all five networks to | |
# determine the best action given the current state. | |
total_rewards = np.zeros((4,)) | |
for n in best_networks: | |
output = n.activate(observation) | |
total_rewards += output | |
best_action = np.argmax(total_rewards) | |
observation, reward, done, info = env.step(best_action) | |
score += reward | |
env.render() | |
if done: | |
break | |
best_scores.append(score) | |
avg_score = sum(best_scores) / len(best_scores) | |
print(k, score, avg_score) | |
if avg_score < 200: | |
solved = False | |
break | |
if solved: | |
print("Solved.") | |
# Save the winners. | |
for n, g in enumerate(best_genomes): | |
name = 'winner-{0}'.format(n) | |
with open(name+'.pickle', 'wb') as f: | |
pickle.dump(g, f) | |
visualize.draw_net(config, g, view=False, filename=name + "-net.gv") | |
visualize.draw_net(config, g, view=False, filename="-net-enabled.gv", | |
show_disabled=False) | |
visualize.draw_net(config, g, view=False, filename="-net-enabled-pruned.gv", | |
show_disabled=False, prune_unused=True) | |
break | |
except KeyboardInterrupt: | |
print("User break.") | |
break | |
env.close() | |
if __name__ == '__main__': | |
run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import copy | |
import warnings | |
import graphviz | |
import matplotlib.pyplot as plt | |
import numpy as np | |
def plot_stats(statistics, ylog=False, view=False, filename='avg_fitness.svg'): | |
""" Plots the population's average and best fitness. """ | |
if plt is None: | |
warnings.warn("This display is not available due to a missing optional dependency (matplotlib)") | |
return | |
generation = range(len(statistics.most_fit_genomes)) | |
best_fitness = [c.fitness for c in statistics.most_fit_genomes] | |
avg_fitness = np.array(statistics.get_fitness_mean()) | |
stdev_fitness = np.array(statistics.get_fitness_stdev()) | |
plt.plot(generation, avg_fitness, 'b-', label="average") | |
#plt.plot(generation, avg_fitness - stdev_fitness, 'g-.', label="-1 sd") | |
plt.plot(generation, avg_fitness + stdev_fitness, 'g-.', label="+1 sd") | |
plt.plot(generation, best_fitness, 'r-', label="best") | |
plt.title("Population's average and best fitness") | |
plt.xlabel("Generations") | |
plt.ylabel("Fitness") | |
plt.grid() | |
plt.legend(loc="best") | |
if ylog: | |
plt.gca().set_yscale('symlog') | |
plt.savefig(filename) | |
if view: | |
plt.show() | |
plt.close() | |
def plot_species(statistics, view=False, filename='speciation.svg'): | |
""" Visualizes speciation throughout evolution. """ | |
if plt is None: | |
warnings.warn("This display is not available due to a missing optional dependency (matplotlib)") | |
return | |
species_sizes = statistics.get_species_sizes() | |
num_generations = len(species_sizes) | |
curves = np.array(species_sizes).T | |
fig, ax = plt.subplots() | |
ax.stackplot(range(num_generations), *curves) | |
plt.title("Speciation") | |
plt.ylabel("Size per Species") | |
plt.xlabel("Generations") | |
plt.savefig(filename) | |
if view: | |
plt.show() | |
plt.close() | |
def draw_net(config, genome, view=False, filename=None, node_names=None, show_disabled=True, prune_unused=False, | |
node_colors=None, fmt='svg'): | |
""" Receives a genome and draws a neural network with arbitrary topology. """ | |
# Attributes for network nodes. | |
if graphviz is None: | |
warnings.warn("This display is not available due to a missing optional dependency (graphviz)") | |
return | |
if node_names is None: | |
node_names = {} | |
assert type(node_names) is dict | |
if node_colors is None: | |
node_colors = {} | |
assert type(node_colors) is dict | |
node_attrs = { | |
'shape': 'circle', | |
'fontsize': '9', | |
'height': '0.2', | |
'width': '0.2'} | |
dot = graphviz.Digraph(format=fmt, node_attr=node_attrs) | |
inputs = set() | |
for k in config.genome_config.input_keys: | |
print("input %d" % k) | |
inputs.add(k) | |
name = node_names.get(k, str(k)) | |
input_attrs = {'style': 'filled', | |
'shape': 'box'} | |
input_attrs['fillcolor'] = node_colors.get(k, 'lightgray') | |
dot.node(name, _attributes=input_attrs) | |
outputs = set() | |
for k in config.genome_config.output_keys: | |
print("output %d" % k) | |
outputs.add(k) | |
name = node_names.get(k, str(k)) | |
node_attrs = {'style': 'filled'} | |
node_attrs['fillcolor'] = node_colors.get(k, 'lightblue') | |
dot.node(name, _attributes=node_attrs) | |
if prune_unused: | |
connections = set() | |
for cg in genome.connections.values(): | |
if cg.enabled or show_disabled: | |
connections.add(cg.key) | |
used_nodes = copy.copy(outputs) | |
pending = copy.copy(outputs) | |
while pending: | |
#print(pending, used_nodes) | |
new_pending = set() | |
for a, b in connections: | |
if b in pending and a not in used_nodes: | |
new_pending.add(a) | |
used_nodes.add(a) | |
pending = new_pending | |
else: | |
used_nodes = set(genome.nodes.keys()) | |
for n in used_nodes: | |
if n in inputs or n in outputs: | |
continue | |
print("hidden %d" % n) | |
attrs = {'style': 'filled'} | |
attrs['fillcolor'] = node_colors.get(n, 'white') | |
dot.node(str(n), _attributes=attrs) | |
for cg in genome.connections.values(): | |
if cg.enabled or show_disabled: | |
#if cg.input not in used_nodes or cg.output not in used_nodes: | |
# continue | |
input, output = cg.key | |
a = node_names.get(input, str(input)) | |
b = node_names.get(output, str(output)) | |
style = 'solid' if cg.enabled else 'dotted' | |
color = 'green' if cg.weight > 0 else 'red' | |
width = str(0.1 + abs(cg.weight / 5.0)) | |
dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width}) | |
dot.render(filename, view=view) | |
return dot |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment