Last active
January 22, 2018 09:49
-
-
Save khuangaf/42bb710fd9d972341416b5ae2d2164a2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class CnnPolicy(object): | |
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=True): | |
ob_shape = (nbatch,) + ob_space.shape | |
actdim = ac_space.shape[0] | |
window_length = ob_space.shape[1] -1 | |
X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs | |
with tf.variable_scope("model", reuse=reuse) as scope: | |
w0 = tf.slice(X, [0,0,0,0],[-1,-1,1,1]) | |
x = tf.slice(X, [0,0,1,0],[-1,-1,-1,-1]) | |
# reuse when testing | |
try: | |
x = conv(tf.cast(x, tf.float32),'c1', fh=1,fw=3,nf=3, stride=1, init_scale=np.sqrt(2)) | |
except: | |
scope.reuse_variables() | |
x = conv(tf.cast(x, tf.float32),'c1', fh=1,fw=3,nf=3, stride=1, init_scale=np.sqrt(2)) | |
x = conv(x, 'c2', fh=1, fw=window_length -2, nf=20, stride= window_length -2, init_scale=np.sqrt(2)) | |
x = tf.concat([x, w0], 3) | |
x = conv(x, 'c3', fh=1, fw=1, nf=1, stride= 1, init_scale=np.sqrt(2)) | |
cash_bias = tf.ones([x.shape[0],1,1,1], tf.float32) | |
c = tf.concat([cash_bias, x], 1) | |
v = conv_to_fc(x) | |
vf = fc(v, 'v',1)[:,0] | |
f = tf.contrib.layers.flatten(c) | |
pi = tf.nn.softmax(f) | |
logstd = tf.get_variable(name="logstd", shape=[1, actdim], | |
initializer=tf.truncated_normal_initializer()) | |
pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1) | |
self.pdtype = make_pdtype(ac_space) | |
self.pd = self.pdtype.pdfromflat(pdparam) | |
a0 = self.pd.sample() | |
a0 = tf.nn.softmax(a0) | |
# a0 = tf.clip_by_value(a0, -eps, eps, 'clip') | |
neglogp0 = self.pd.neglogp(a0) | |
self.initial_state = None | |
def step(ob, *_args, **_kwargs): | |
a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob}) | |
return a, v, self.initial_state, neglogp | |
def value(ob, *_args, **_kwargs): | |
return sess.run(vf, {X:ob}) | |
self.X = X | |
self.pi = pi | |
self.vf = vf | |
self.step = step | |
self.value = value |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment