Skip to content

Instantly share code, notes, and snippets.

@breeko
Created May 8, 2017 02:09
Show Gist options
  • Save breeko/4a1ad5f1bb0c504d9a8ee3420deceb28 to your computer and use it in GitHub Desktop.
Save breeko/4a1ad5f1bb0c504d9a8ee3420deceb28 to your computer and use it in GitHub Desktop.
Evolutionary Algorithm for solving CartPole
import numpy as np
import gym
def evolve(W, sigma, prob_mutate=0.5):
rand = (np.random.randn(*W.shape) - 0.5) * sigma
mutate = np.random.choice([0,1],size=W.shape,p=(1-prob_mutate, prob_mutate))
return W + (mutate * rand)
def evaluate(W, num_trials=1, max_t=1000, render=False):
cum_rewards = []
for num_trial in range(num_trials):
state = env.reset().reshape(1,-1)
t = 0
rewards = 0
while t < max_t:
if render:
env.render()
t += 1
action = np.argmax(state.dot(W)) % env.action_space.n
new_state, reward, done, info = env.step(action)
state = new_state.reshape(1,-1)
rewards += reward
if done:
break
cum_rewards.append(rewards)
if render:
env.render(close=True)
return cum_rewards
NUM_EPOCHS = 1000
NUM_TRIALS = 10 # Number of trials per epoch to score individual
NUM_INDIVIDUALS = 10 # Number of individuals in each epoch
SIGMA_MAX = 1. # Maximum sigma for mutations
SIGMA_MIN = 1e-2 # Minimum sigma for mutations (mutations decrease as score approaches goal score)
PROB_MUTATE = 1 # Probability a characteristic will be mutated
PROB_MUTATE_DECAY = 0.99 # Factor that reduces probability of mutation
NUM_NEURONS = 1
GOAL_SCORE = 195
GOAL_TRIALS = 100
UPLOAD = False # Whether to upload to openai
sigma = SIGMA_MAX
env = gym.make('CartPole-v0')
env.seed(0)
np.random.seed(0)
if UPLOAD:
from key import api_key
env = gym.wrappers.Monitor(env,directory="videos",force=True)
best_so_far = -float("inf")
best_W = np.random.random(size=(env.observation_space.shape[0], env.action_space.n * NUM_NEURONS))
cum_rewards = []
cum_trials = 0
for num_epoch in range(NUM_EPOCHS):
if num_epoch % 10 == 0:
print("Epoch: {} / {}, last {} scores {:0.2f}".format(num_epoch, NUM_EPOCHS, GOAL_TRIALS, np.mean(cum_rewards[-GOAL_TRIALS:])))
Ws = [evolve(best_W,sigma=sigma,prob_mutate=PROB_MUTATE) for _ in range(NUM_INDIVIDUALS)]
for W in Ws:
scores = evaluate(W,num_trials=NUM_TRIALS)
cum_trials += NUM_TRIALS
cum_rewards.extend(scores)
mean_score = np.mean(scores)
if mean_score > best_so_far:
best_W = W
PROB_MUTATE *= PROB_MUTATE_DECAY
best_so_far = mean_score
sigma = max(SIGMA_MIN, min(SIGMA_MAX, SIGMA_MIN + (GOAL_SCORE - best_so_far) / GOAL_SCORE * SIGMA_MAX))
print("New best: {:0.2f}".format(best_so_far))
if np.mean(cum_rewards[-GOAL_TRIALS:]) >= GOAL_SCORE:
print("Trainding complete in {} trials".format(cum_trials))
break
env.close()
env = env.env.env
if UPLOAD:
gym.upload("./videos/",api_key=api_key)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment