Abhinav Moudgil amoudgl

amoudgl / random_guessing.py

Created January 16, 2017 06:17

Random Guessing Algorithm for Cartpole Environment

	# random guessing algorithm
	# generate 10000 random configurations of the model's parameters and pick the one that achieves the best cumulative reward.
	# optimize it for weighted sum

	import gym
	from gym import wrappers
	import numpy as np

	env = gym.make('CartPole-v0')
	env = wrappers.Monitor(env, '/tmp/cartpole-random-guessing', force=True)

amoudgl / hill_climbing.py

Created January 16, 2017 06:16

Hill Climbing Algorithm for Cartpole Environment

	# hill climbing algorithm
	# generate a random configuration of the parameters, add small amount of noise to the parameters and evaluate the new parameter configuration
	# if new configuration is better than old one, discard the old one and accept the new one
	# optimize it for weighted sum

	# returns the net episode reward
	def get_episode_reward(env, observation, params):
	t = 0
	net_reward = 0
	while (t < 1000):

amoudgl / policy_gradient.py

Created January 16, 2017 06:15

Policy Gradient Algorithm for Cartpole Environment

	# monte carlo policy gradient algorithm
	# use neural network to decide the policy
	# from observations and rewards, update the parameters of the neural networks to optimize the policy

	import numpy as np
	import tensorflow as tf
	import gym
	from gym import wrappers

	# initialize constants