-
-
Save danijar/c7ec9a30052127c7a1ad169eeb83f159 to your computer and use it in GitHub Desktop.
| # Example for my blog post at: | |
| # https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/ | |
| import functools | |
| import sets | |
| import tensorflow as tf | |
| def lazy_property(function): | |
| attribute = '_' + function.__name__ | |
| @property | |
| @functools.wraps(function) | |
| def wrapper(self): | |
| if not hasattr(self, attribute): | |
| setattr(self, attribute, function(self)) | |
| return getattr(self, attribute) | |
| return wrapper | |
| class SequenceClassification: | |
| def __init__(self, data, target, dropout, num_hidden=200, num_layers=3): | |
| self.data = data | |
| self.target = target | |
| self.dropout = dropout | |
| self._num_hidden = num_hidden | |
| self._num_layers = num_layers | |
| self.prediction | |
| self.error | |
| self.optimize | |
| @lazy_property | |
| def prediction(self): | |
| # Recurrent network. | |
| network = tf.contrib.rnn.GRUCell(self._num_hidden) | |
| network = tf.contrib.rnn.DropoutWrapper( | |
| network, output_keep_prob=self.dropout) | |
| network = tf.contrib.rnn.MultiRNNCell([network] * self._num_layers) | |
| output, _ = tf.nn.dynamic_rnn(network, self.data, dtype=tf.float32) | |
| # Select last output. | |
| output = tf.transpose(output, [1, 0, 2]) | |
| last = tf.gather(output, int(output.get_shape()[0]) - 1) | |
| # Softmax layer. | |
| weight, bias = self._weight_and_bias( | |
| self._num_hidden, int(self.target.get_shape()[1])) | |
| prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) | |
| return prediction | |
| @lazy_property | |
| def cost(self): | |
| cross_entropy = -tf.reduce_sum(self.target * tf.log(self.prediction)) | |
| return cross_entropy | |
| @lazy_property | |
| def optimize(self): | |
| learning_rate = 0.003 | |
| optimizer = tf.train.RMSPropOptimizer(learning_rate) | |
| return optimizer.minimize(self.cost) | |
| @lazy_property | |
| def error(self): | |
| mistakes = tf.not_equal( | |
| tf.argmax(self.target, 1), tf.argmax(self.prediction, 1)) | |
| return tf.reduce_mean(tf.cast(mistakes, tf.float32)) | |
| @staticmethod | |
| def _weight_and_bias(in_size, out_size): | |
| weight = tf.truncated_normal([in_size, out_size], stddev=0.01) | |
| bias = tf.constant(0.1, shape=[out_size]) | |
| return tf.Variable(weight), tf.Variable(bias) | |
| def main(): | |
| # We treat images as sequences of pixel rows. | |
| train, test = sets.Mnist() | |
| _, rows, row_size = train.data.shape | |
| num_classes = train.target.shape[1] | |
| data = tf.placeholder(tf.float32, [None, rows, row_size]) | |
| target = tf.placeholder(tf.float32, [None, num_classes]) | |
| dropout = tf.placeholder(tf.float32) | |
| model = SequenceClassification(data, target, dropout) | |
| sess = tf.Session() | |
| sess.run(tf.global_variables_initializer()) | |
| for epoch in range(10): | |
| for _ in range(100): | |
| batch = train.sample(10) | |
| sess.run(model.optimize, { | |
| data: batch.data, target: batch.target, dropout: 0.5}) | |
| error = sess.run(model.error, { | |
| data: test.data, target: test.target, dropout: 1}) | |
| print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error)) | |
| if __name__ == '__main__': | |
| main() |
hey Joelkr Instead of using
network = tf.contrib.rnn.GRUCell(self._num_hidden)
network = tf.contrib.rnn.DropoutWrapper(
network, output_keep_prob=self.dropout)
network = tf.contrib.rnn.MultiRNNCell([network] * self._num_layers)
define each cell separately and then join them together so they dont share variables like the following :
cells = []
for _ in range(self._num_layers):
cell = tf.contrib.rnn.GRUCell(self._num_hidden) # Or LSTMCell(num_units)
cell = tf.contrib.rnn.DropoutWrapper(
cell, output_keep_prob=1.0 - self.dropout)
cells.append(cell)
network = tf.contrib.rnn.MultiRNNCell(cells)
Hey guys, i saw last return zero tensor, not real last hidden.
output, _ = tf.nn.dynamic_rnn(network, self.data, dtype=tf.float32)
# Select last output.
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)
I have two more questions about this project, hoping that someone who's still reading this blog would be able to answer.
- For a while I was sure about this, but now I'm a bit worried: do I understand it correctly, that one "row" in data represent a feature vector at time t, and the next row represents a feature vector at time t+1?
- Would it be possible to include an embedding layer below the GRU layers that would transform rows of binaries with high dimensionality (size) to a lower dimensionality, and if so, how?
Thank you in advance for any help.
I'm trying to learn tensorflow and am running version 1.2.1. Every time I try to run any example with a construction like:
network = tf.contrib.rnn.GRUCell(self._num_hidden)
network = tf.contrib.rnn.DropoutWrapper(
network, output_keep_prob=self.dropout)
network = tf.contrib.rnn.MultiRNNCell([network] * self._num_layers)
I get an error like:
ValueError: Trying to share variable rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel, but specified shape (400, 400) and found shape (228, 400).
If I remove the DropoutWrapper() call, everything is fine. I can't seem to find any example that doesn't do this, so I'm having trouble learning how to construct a cell with dropout.