🤗

Qian Qu quq99

🤗

quq99 / pg-baseline.py

Created March 1, 2017 04:58

a improved PG algorithm for Atari pong

	""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
	import numpy as np
	import cPickle as pickle
	import gym
	import copy

	# hyperparameters
	H = 200 # number of hidden layer neurons
	batch_size = 10 # every how many episodes to do a param update?
	learning_rate = 1e-4

quq99 / CEM.py

Created February 16, 2017 14:43

CEM for Acrobot-v1

	import numpy as np
	import gym
	from gym.spaces import Discrete, Box
	from gym import wrappers
	#===================================
	#Polices
	#==================================

	class DeterministicDiscreteActionLinearPolicy(object):
	def __init__(self, theta, ob_space, ac_space):

quq99 / CEM.py

Created February 16, 2017 14:26

CEM algorithm for CartPole-v0

	import numpy as np
	import gym
	from gym.spaces import Discrete, Box
	from gym import wrappers
	#===================================
	#Polices
	#==================================

	class DeterministicDiscreteActionLinearPolicy(object):
	def __init__(self, theta, ob_space, ac_space):

quq99 / pg.py

Created February 16, 2017 14:02

a PG algorithm for Acrobot-v1

	import numpy as np, os
	os.environ["THEANO_FLAGS"]="device=cpu,floatX=float64"
	import theano, theano.tensor as T
	import gym
	from gym import wrappers

	def discount(x, gamma):
	"""
	Given vector x, computes a vector y such that
	y[i] = x[i] + gamma * x[i+1] + gamma^2 * x[i+2] ...

quq99 / DQN.py

Created February 15, 2017 11:38

a DQN code solving CartPole-v0

	import gym
	from gym import wrappers
	import tensorflow as tf
	import numpy as np
	import random
	from collections import deque

	# Hyper Parameters for DQN
	GAMMA = 0.9 # discount factor for target Q
	INITIAL_EPSILON = 0.5 # starting value of epsilon