Skip to content

Instantly share code, notes, and snippets.

@khuangaf
Last active January 22, 2018 09:49
Show Gist options
  • Save khuangaf/42bb710fd9d972341416b5ae2d2164a2 to your computer and use it in GitHub Desktop.
Save khuangaf/42bb710fd9d972341416b5ae2d2164a2 to your computer and use it in GitHub Desktop.
class CnnPolicy(object):
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=True):
ob_shape = (nbatch,) + ob_space.shape
actdim = ac_space.shape[0]
window_length = ob_space.shape[1] -1
X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs
with tf.variable_scope("model", reuse=reuse) as scope:
w0 = tf.slice(X, [0,0,0,0],[-1,-1,1,1])
x = tf.slice(X, [0,0,1,0],[-1,-1,-1,-1])
# reuse when testing
try:
x = conv(tf.cast(x, tf.float32),'c1', fh=1,fw=3,nf=3, stride=1, init_scale=np.sqrt(2))
except:
scope.reuse_variables()
x = conv(tf.cast(x, tf.float32),'c1', fh=1,fw=3,nf=3, stride=1, init_scale=np.sqrt(2))
x = conv(x, 'c2', fh=1, fw=window_length -2, nf=20, stride= window_length -2, init_scale=np.sqrt(2))
x = tf.concat([x, w0], 3)
x = conv(x, 'c3', fh=1, fw=1, nf=1, stride= 1, init_scale=np.sqrt(2))
cash_bias = tf.ones([x.shape[0],1,1,1], tf.float32)
c = tf.concat([cash_bias, x], 1)
v = conv_to_fc(x)
vf = fc(v, 'v',1)[:,0]
f = tf.contrib.layers.flatten(c)
pi = tf.nn.softmax(f)
logstd = tf.get_variable(name="logstd", shape=[1, actdim],
initializer=tf.truncated_normal_initializer())
pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1)
self.pdtype = make_pdtype(ac_space)
self.pd = self.pdtype.pdfromflat(pdparam)
a0 = self.pd.sample()
a0 = tf.nn.softmax(a0)
# a0 = tf.clip_by_value(a0, -eps, eps, 'clip')
neglogp0 = self.pd.neglogp(a0)
self.initial_state = None
def step(ob, *_args, **_kwargs):
a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob})
return a, v, self.initial_state, neglogp
def value(ob, *_args, **_kwargs):
return sess.run(vf, {X:ob})
self.X = X
self.pi = pi
self.vf = vf
self.step = step
self.value = value
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment