-
-
Save danijar/ff8b4b81da55c99b5096913c4953d29b to your computer and use it in GitHub Desktop.
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import numpy as np | |
import tensorflow as tf | |
class GRU(tf.contrib.rnn.RNNCell): | |
def __init__( | |
self, size, activation=tf.tanh, reuse=None, | |
normalizer=tf.contrib.layers.layer_norm, | |
initializer=tf.contrib.layers.xavier_initializer()): | |
super(GRU, self).__init__(_reuse=reuse) | |
self._size = size | |
self._activation = activation | |
self._normalizer = normalizer | |
self._initializer = initializer | |
@property | |
def state_size(self): | |
return self._size | |
@property | |
def output_size(self): | |
return self._size | |
def call(self, input_, state): | |
update, reset = tf.split(self._forward( | |
'update_reset', [state, input_], 2 * self._size, tf.nn.sigmoid, | |
bias_initializer=tf.constant_initializer(-1.)), 2, 1) | |
candidate = self._forward( | |
'candidate', [reset * state, input_], self._size, self._activation) | |
state = (1 - update) * state + update * candidate | |
return state, state | |
def _forward(self, name, inputs, size, activation, **kwargs): | |
with tf.variable_scope(name): | |
return _forward( | |
inputs, size, activation, normalizer=self._normalizer, | |
weight_initializer=self._initializer, **kwargs) | |
def _forward( | |
inputs, size, activation, normalizer=tf.contrib.layers.layer_norm, | |
weight_initializer=tf.contrib.layers.xavier_initializer(), | |
bias_initializer=tf.zeros_initializer()): | |
if not isinstance(inputs, (tuple, list)): | |
inputs = (inputs,) | |
shapes = [] | |
outputs = [] | |
# Map each input to individually normalize their outputs. | |
for index, input_ in enumerate(inputs): | |
shapes.append(input_.shape[1: -1].as_list()) | |
input_ = tf.contrib.layers.flatten(input_) | |
weight = tf.get_variable( | |
'weight_{}'.format(index + 1), (int(input_.shape[1]), size), | |
tf.float32, weight_initializer) | |
output = tf.matmul(input_, weight) | |
if normalizer: | |
output = normalizer(output) | |
outputs.append(output) | |
output = tf.reduce_mean(outputs, 0) | |
# Add bias after normalization. | |
bias = tf.get_variable( | |
'weight', (size,), tf.float32, bias_initializer) | |
output += bias | |
# Activation function. | |
if activation: | |
output = activation(output) | |
# Restore shape dimensions that are consistent among inputs. | |
min_dim = min(len(shape[1:]) for shape in shapes) | |
dim_shapes = [[shape[dim] for shape in shapes] for dim in range(min_dim)] | |
matching_dims = ''.join('NY'[len(set(x)) == 1] for x in dim_shapes) + 'N' | |
agreement = matching_dims.index('N') | |
remaining = sum(np.prod(shape[agreement:]) for shape in shapes) | |
if agreement: | |
batch_size = output.shape[0].value or -1 | |
shape = [batch_size] + shapes[:agreement] + [remaining] | |
output = tf.reshape(output, shape) | |
return output |
I am trying to use this GRU cell and i am getting below error:
File "rnn.py", line 94, in _forward
output = tf.reshape(output, shape)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 3938, in reshape
"Reshape", tensor=tensor, shape=shape, name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 513, in _apply_op_helper
raise err
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 510, in _apply_op_helper
preferred_dtype=default_dtype)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 926, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/constant_op.py", line 229, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/constant_op.py", line 208, in constant
value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/tensor_util.py", line 472, in make_tensor_proto
"supported type." % (type(values), values))
TypeError: Failed to convert object of type <class 'list'> to Tensor. Contents: [None, -1]. Consider casting elements to a supported type.
Variables before error are these:
dim = 0
shapes = [[], []]
output = Tensor("rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/gru/update_reset/Sigmoid:0", shape=(?, 400), dtype=float32)
shape = [None, -1]
i am trying to test this using the sequence classification code from here https://gist.github.com/danijar/c7ec9a30052127c7a1ad169eeb83f159, which has hidden layer length of 200.
Thanks for reporting and sorry for finding this comment so late. The code used to only work with static batch size. I just updated it to also work when the batch size is unknown.
Nice work on this one.