Created
May 27, 2016 18:33
-
-
Save PuZZleDucK/4a6b877964a0e67648b88bee05eeebf2 to your computer and use it in GitHub Desktop.
Simple Genetic Algorithm Atari simulation for the OpenAI Gym
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Simple Genetic Algorithm Atari simulation | |
import gym | |
import numpy as np | |
import sys | |
from pybrain.tools.shortcuts import buildNetwork | |
import random | |
genisis = "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" | |
children_per_gen = 200 | |
survivors_per_gen = 10 | |
generations = 100 | |
max_time = 15000 | |
def mutate_single_gene(action_string): | |
new_action = random.choice(range(6)) | |
new_location = random.choice(range(len(action_string))) | |
return action_string[:new_location] + str(new_action) + action_string[new_location+1:] | |
def mutate_add_genome(action_string): | |
new_actions = "{}{}{}{}{}".format(random.choice(range(6)), random.choice(range(6)), random.choice(range(6)), random.choice(range(6)), random.choice(range(6))) | |
splice_location = random.choice(range(len(action_string))) | |
return action_string[:splice_location] + str(new_actions) + action_string[splice_location:] | |
def mutate_duplicate_genome(action_string): | |
copy_location = random.choice(range(len(action_string))) | |
splice_location = random.choice(range(len(action_string))) | |
return action_string[:splice_location] + action_string[copy_location:(copy_location+5)] + action_string[splice_location:] | |
def mutate_delete_genome(action_string): | |
delete_location = random.choice(range(len(action_string))) | |
return action_string[:delete_location] + action_string[(delete_location+5):] | |
def mutate_scramble_genome(action_string): #random.sample(a, len(a)) | |
shuffle_location = random.choice(range(len(action_string))) | |
segment = action_string[shuffle_location:shuffle_location+5] | |
new_segment = random.sample(segment, len(segment)) | |
return action_string[:shuffle_location] + "".join(new_segment) + action_string[(shuffle_location+5):] | |
def mutate(action_string): | |
mutation_type = random.choice(range(10)) | |
if mutation_type <= 1: # 10% - no mutation | |
return action_string | |
elif mutation_type <= 4: # 30% - single gene mutation | |
return mutate_single_gene(action_string) | |
elif mutation_type <= 6: # 20% - add random genome | |
return mutate_add_genome(action_string) | |
elif mutation_type <= 7: # 10% - replicate genome | |
return mutate_duplicate_genome(action_string) | |
elif mutation_type <= 8: # 10% delete genome | |
return mutate_delete_genome(action_string) | |
else: # 20% - shuffle genome | |
return mutate_scramble_genome(action_string) | |
def get_survivors(last_generation): | |
return last_generation[(0-survivors_per_gen):] | |
def sort_generation(last_generation): | |
sorted_gen = sorted(last_generation, key=lambda tup: tup[0]) | |
return_gen = [] | |
for score, child in sorted_gen: | |
return_gen.append(child) | |
print " :: top 5: {}".format(sorted_gen[-1]) | |
print " :: : {}".format(sorted_gen[-2]) | |
print " :: : {}".format(sorted_gen[-3]) | |
print " :: : {}".format(sorted_gen[-4]) | |
print " :: : {}".format(sorted_gen[-5]) | |
return return_gen | |
if __name__ == '__main__': | |
last_generation = [] | |
for new_child in range(children_per_gen): | |
last_generation.append(genisis) | |
env = gym.make('Qbert-v0') | |
env.monitor.start('/tmp/atari-experiment-7', force=True) | |
env.reset() | |
for generation in range(generations): | |
survivors = get_survivors(last_generation) | |
this_generation = [] | |
for child in range(children_per_gen): | |
mutant = mutate( survivors[child % len(survivors)] ) | |
obervation = env.reset() | |
total_reward = 0 | |
number_of_actions = len(mutant) | |
for time in range(max_time): | |
env.render() | |
next_action = int(mutant[time % number_of_actions]) | |
observation, reward, done, info = env.step(next_action) | |
total_reward = total_reward + reward | |
if done: | |
break | |
print " :: mutant {}: {} -- {}.".format(str(child).zfill(3), mutant, total_reward) | |
score_and_child = (total_reward, mutant) | |
this_generation.append(score_and_child) | |
last_generation = sort_generation(this_generation) | |
env.monitor.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you so much! Based on your method, I create an even simpler one (only using crossover and single mutation) :)
https://github.com/whusym/gym_practice/blob/master/simpleGA.py