Skip to content

Instantly share code, notes, and snippets.

@quq99
quq99 / pg-baseline.py
Created March 1, 2017 04:58
a improved PG algorithm for Atari pong
""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
import numpy as np
import cPickle as pickle
import gym
import copy
# hyperparameters
H = 200 # number of hidden layer neurons
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
@quq99
quq99 / CEM.py
Created February 16, 2017 14:43
CEM for Acrobot-v1
import numpy as np
import gym
from gym.spaces import Discrete, Box
from gym import wrappers
#===================================
#Polices
#==================================
class DeterministicDiscreteActionLinearPolicy(object):
def __init__(self, theta, ob_space, ac_space):
@quq99
quq99 / CEM.py
Created February 16, 2017 14:26
CEM algorithm for CartPole-v0
import numpy as np
import gym
from gym.spaces import Discrete, Box
from gym import wrappers
#===================================
#Polices
#==================================
class DeterministicDiscreteActionLinearPolicy(object):
def __init__(self, theta, ob_space, ac_space):
@quq99
quq99 / pg.py
Created February 16, 2017 14:02
a PG algorithm for Acrobot-v1
import numpy as np, os
os.environ["THEANO_FLAGS"]="device=cpu,floatX=float64"
import theano, theano.tensor as T
import gym
from gym import wrappers
def discount(x, gamma):
"""
Given vector x, computes a vector y such that
y[i] = x[i] + gamma * x[i+1] + gamma^2 * x[i+2] ...
@quq99
quq99 / DQN.py
Created February 15, 2017 11:38
a DQN code solving CartPole-v0
import gym
from gym import wrappers
import tensorflow as tf
import numpy as np
import random
from collections import deque
# Hyper Parameters for DQN
GAMMA = 0.9 # discount factor for target Q
INITIAL_EPSILON = 0.5 # starting value of epsilon