Skip to content

Instantly share code, notes, and snippets.

@tsu-nera
Created June 9, 2017 22:30
Show Gist options
  • Save tsu-nera/9684a3019b30329ff89e40fa316a8406 to your computer and use it in GitHub Desktop.
Save tsu-nera/9684a3019b30329ff89e40fa316a8406 to your computer and use it in GitHub Desktop.
import gym
import numpy as np
from gym import wrappers
env = gym.make("FrozenLake-v0")
env = wrappers.Monitor(env, '/tmp/cartpole-experiment-5')
env.reset();
n_states = env.observation_space.n
n_actions = env.action_space.n
n_epochs = 100 #how many cycles to make
pool_size = 100 #how many policies to maintain
import random
from deap import base
from deap import creator
from deap import tools
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, n_actions-1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n_states)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
# Evaruate Method
def sample_reward(env, policy, t_max=100):
s = env.reset()
total_reward = 0
for _ in range(t_max):
s, reward, is_done, _ = env.step(policy[s])
total_reward += reward
if is_done:
break
return total_reward
def evaluate(policy, n_times=100):
rewards = [sample_reward(env, policy) for _ in range(n_times)]
return float(np.mean(rewards)),
toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxUniform, indpb=0.5)
toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.1)
toolbox.register("select", tools.selBest)
##############
# Initialize
##############
pool = toolbox.population(n=pool_size)
CXPB, MUTPB= 0.5, 0.1
fitnesses = list(map(toolbox.evaluate, pool))
for policy, fit in zip(pool, fitnesses):
policy.fitness.values = fit
for g in range(n_epochs):
print("-- %i th Generation --" % g)
##############
# Select
##############
offspring = toolbox.select(pool, len(pool))
offspring = list(map(toolbox.clone, offspring))
##############
# crossover
##############
for child1, child2 in zip(offspring[::2], offspring[1::2]):
if random.random() < CXPB:
toolbox.mate(child1, child2)
del child1.fitness.values
del child2.fitness.values
##############
# mutation
##############
for mutant in offspring:
if random.random() < MUTPB:
toolbox.mutate(mutant)
del mutant.fitness.values
invalid_policy = [policy for policy in offspring if not policy.fitness.valid]
fitnesses = map(toolbox.evaluate, invalid_policy)
for policy, fit in zip(invalid_policy, fitnesses):
policy.fitness.values = fit
pool[:] = offspring
best_score = tools.selBest(pool, 1)[0]
print("best score: %s, %s" % (best_score,
best_score.fitness.values))
env.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment