Created
May 8, 2017 02:09
-
-
Save breeko/4a1ad5f1bb0c504d9a8ee3420deceb28 to your computer and use it in GitHub Desktop.
Evolutionary Algorithm for solving CartPole
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import gym | |
| def evolve(W, sigma, prob_mutate=0.5): | |
| rand = (np.random.randn(*W.shape) - 0.5) * sigma | |
| mutate = np.random.choice([0,1],size=W.shape,p=(1-prob_mutate, prob_mutate)) | |
| return W + (mutate * rand) | |
| def evaluate(W, num_trials=1, max_t=1000, render=False): | |
| cum_rewards = [] | |
| for num_trial in range(num_trials): | |
| state = env.reset().reshape(1,-1) | |
| t = 0 | |
| rewards = 0 | |
| while t < max_t: | |
| if render: | |
| env.render() | |
| t += 1 | |
| action = np.argmax(state.dot(W)) % env.action_space.n | |
| new_state, reward, done, info = env.step(action) | |
| state = new_state.reshape(1,-1) | |
| rewards += reward | |
| if done: | |
| break | |
| cum_rewards.append(rewards) | |
| if render: | |
| env.render(close=True) | |
| return cum_rewards | |
| NUM_EPOCHS = 1000 | |
| NUM_TRIALS = 10 # Number of trials per epoch to score individual | |
| NUM_INDIVIDUALS = 10 # Number of individuals in each epoch | |
| SIGMA_MAX = 1. # Maximum sigma for mutations | |
| SIGMA_MIN = 1e-2 # Minimum sigma for mutations (mutations decrease as score approaches goal score) | |
| PROB_MUTATE = 1 # Probability a characteristic will be mutated | |
| PROB_MUTATE_DECAY = 0.99 # Factor that reduces probability of mutation | |
| NUM_NEURONS = 1 | |
| GOAL_SCORE = 195 | |
| GOAL_TRIALS = 100 | |
| UPLOAD = False # Whether to upload to openai | |
| sigma = SIGMA_MAX | |
| env = gym.make('CartPole-v0') | |
| env.seed(0) | |
| np.random.seed(0) | |
| if UPLOAD: | |
| from key import api_key | |
| env = gym.wrappers.Monitor(env,directory="videos",force=True) | |
| best_so_far = -float("inf") | |
| best_W = np.random.random(size=(env.observation_space.shape[0], env.action_space.n * NUM_NEURONS)) | |
| cum_rewards = [] | |
| cum_trials = 0 | |
| for num_epoch in range(NUM_EPOCHS): | |
| if num_epoch % 10 == 0: | |
| print("Epoch: {} / {}, last {} scores {:0.2f}".format(num_epoch, NUM_EPOCHS, GOAL_TRIALS, np.mean(cum_rewards[-GOAL_TRIALS:]))) | |
| Ws = [evolve(best_W,sigma=sigma,prob_mutate=PROB_MUTATE) for _ in range(NUM_INDIVIDUALS)] | |
| for W in Ws: | |
| scores = evaluate(W,num_trials=NUM_TRIALS) | |
| cum_trials += NUM_TRIALS | |
| cum_rewards.extend(scores) | |
| mean_score = np.mean(scores) | |
| if mean_score > best_so_far: | |
| best_W = W | |
| PROB_MUTATE *= PROB_MUTATE_DECAY | |
| best_so_far = mean_score | |
| sigma = max(SIGMA_MIN, min(SIGMA_MAX, SIGMA_MIN + (GOAL_SCORE - best_so_far) / GOAL_SCORE * SIGMA_MAX)) | |
| print("New best: {:0.2f}".format(best_so_far)) | |
| if np.mean(cum_rewards[-GOAL_TRIALS:]) >= GOAL_SCORE: | |
| print("Trainding complete in {} trials".format(cum_trials)) | |
| break | |
| env.close() | |
| env = env.env.env | |
| if UPLOAD: | |
| gym.upload("./videos/",api_key=api_key) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment