Skip to content

Instantly share code, notes, and snippets.

View amoudgl's full-sized avatar

Abhinav Moudgil amoudgl

View GitHub Profile
@amoudgl
amoudgl / random_guessing.py
Created January 16, 2017 06:17
Random Guessing Algorithm for Cartpole Environment
# random guessing algorithm
# generate 10000 random configurations of the model's parameters and pick the one that achieves the best cumulative reward.
# optimize it for weighted sum
import gym
from gym import wrappers
import numpy as np
env = gym.make('CartPole-v0')
env = wrappers.Monitor(env, '/tmp/cartpole-random-guessing', force=True)
@amoudgl
amoudgl / hill_climbing.py
Created January 16, 2017 06:16
Hill Climbing Algorithm for Cartpole Environment
# hill climbing algorithm
# generate a random configuration of the parameters, add small amount of noise to the parameters and evaluate the new parameter configuration
# if new configuration is better than old one, discard the old one and accept the new one
# optimize it for weighted sum
# returns the net episode reward
def get_episode_reward(env, observation, params):
t = 0
net_reward = 0
while (t < 1000):
@amoudgl
amoudgl / policy_gradient.py
Created January 16, 2017 06:15
Policy Gradient Algorithm for Cartpole Environment
# monte carlo policy gradient algorithm
# use neural network to decide the policy
# from observations and rewards, update the parameters of the neural networks to optimize the policy
import numpy as np
import tensorflow as tf
import gym
from gym import wrappers
# initialize constants