This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# random guessing algorithm | |
# generate 10000 random configurations of the model's parameters and pick the one that achieves the best cumulative reward. | |
# optimize it for weighted sum | |
import gym | |
from gym import wrappers | |
import numpy as np | |
env = gym.make('CartPole-v0') | |
env = wrappers.Monitor(env, '/tmp/cartpole-random-guessing', force=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# hill climbing algorithm | |
# generate a random configuration of the parameters, add small amount of noise to the parameters and evaluate the new parameter configuration | |
# if new configuration is better than old one, discard the old one and accept the new one | |
# optimize it for weighted sum | |
# returns the net episode reward | |
def get_episode_reward(env, observation, params): | |
t = 0 | |
net_reward = 0 | |
while (t < 1000): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# monte carlo policy gradient algorithm | |
# use neural network to decide the policy | |
# from observations and rewards, update the parameters of the neural networks to optimize the policy | |
import numpy as np | |
import tensorflow as tf | |
import gym | |
from gym import wrappers | |
# initialize constants |
NewerOlder