-
-
Save danijar/c7ec9a30052127c7a1ad169eeb83f159 to your computer and use it in GitHub Desktop.
# Example for my blog post at: | |
# https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/ | |
import functools | |
import sets | |
import tensorflow as tf | |
def lazy_property(function): | |
attribute = '_' + function.__name__ | |
@property | |
@functools.wraps(function) | |
def wrapper(self): | |
if not hasattr(self, attribute): | |
setattr(self, attribute, function(self)) | |
return getattr(self, attribute) | |
return wrapper | |
class SequenceClassification: | |
def __init__(self, data, target, dropout, num_hidden=200, num_layers=3): | |
self.data = data | |
self.target = target | |
self.dropout = dropout | |
self._num_hidden = num_hidden | |
self._num_layers = num_layers | |
self.prediction | |
self.error | |
self.optimize | |
@lazy_property | |
def prediction(self): | |
# Recurrent network. | |
network = tf.contrib.rnn.GRUCell(self._num_hidden) | |
network = tf.contrib.rnn.DropoutWrapper( | |
network, output_keep_prob=self.dropout) | |
network = tf.contrib.rnn.MultiRNNCell([network] * self._num_layers) | |
output, _ = tf.nn.dynamic_rnn(network, self.data, dtype=tf.float32) | |
# Select last output. | |
output = tf.transpose(output, [1, 0, 2]) | |
last = tf.gather(output, int(output.get_shape()[0]) - 1) | |
# Softmax layer. | |
weight, bias = self._weight_and_bias( | |
self._num_hidden, int(self.target.get_shape()[1])) | |
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) | |
return prediction | |
@lazy_property | |
def cost(self): | |
cross_entropy = -tf.reduce_sum(self.target * tf.log(self.prediction)) | |
return cross_entropy | |
@lazy_property | |
def optimize(self): | |
learning_rate = 0.003 | |
optimizer = tf.train.RMSPropOptimizer(learning_rate) | |
return optimizer.minimize(self.cost) | |
@lazy_property | |
def error(self): | |
mistakes = tf.not_equal( | |
tf.argmax(self.target, 1), tf.argmax(self.prediction, 1)) | |
return tf.reduce_mean(tf.cast(mistakes, tf.float32)) | |
@staticmethod | |
def _weight_and_bias(in_size, out_size): | |
weight = tf.truncated_normal([in_size, out_size], stddev=0.01) | |
bias = tf.constant(0.1, shape=[out_size]) | |
return tf.Variable(weight), tf.Variable(bias) | |
def main(): | |
# We treat images as sequences of pixel rows. | |
train, test = sets.Mnist() | |
_, rows, row_size = train.data.shape | |
num_classes = train.target.shape[1] | |
data = tf.placeholder(tf.float32, [None, rows, row_size]) | |
target = tf.placeholder(tf.float32, [None, num_classes]) | |
dropout = tf.placeholder(tf.float32) | |
model = SequenceClassification(data, target, dropout) | |
sess = tf.Session() | |
sess.run(tf.global_variables_initializer()) | |
for epoch in range(10): | |
for _ in range(100): | |
batch = train.sample(10) | |
sess.run(model.optimize, { | |
data: batch.data, target: batch.target, dropout: 0.5}) | |
error = sess.run(model.error, { | |
data: test.data, target: test.target, dropout: 1}) | |
print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error)) | |
if __name__ == '__main__': | |
main() |
I tried to do this with my own data, which is a numpy array with 30 rows and each row has a vector of 20 elements. However, the skilearn datasets are very different from my own so I was not able to adapt to this code. The main error comes from data.shape and target.shape. when I delete the data and target and manually define the num_classes, the next error comes when I try to run session and it gives me this error: AttributeError: 'numpy.ndarray' object has no attribute 'target'
I'm trying to learn tensorflow and am running version 1.2.1. Every time I try to run any example with a construction like:
network = tf.contrib.rnn.GRUCell(self._num_hidden)
network = tf.contrib.rnn.DropoutWrapper(
network, output_keep_prob=self.dropout)
network = tf.contrib.rnn.MultiRNNCell([network] * self._num_layers)
I get an error like:
ValueError: Trying to share variable rnn/multi_rnn_cell/cell_0/gru_cell/gates/kernel, but specified shape (400, 400) and found shape (228, 400).
If I remove the DropoutWrapper() call, everything is fine. I can't seem to find any example that doesn't do this, so I'm having trouble learning how to construct a cell with dropout.
hey Joelkr Instead of using
network = tf.contrib.rnn.GRUCell(self._num_hidden)
network = tf.contrib.rnn.DropoutWrapper(
network, output_keep_prob=self.dropout)
network = tf.contrib.rnn.MultiRNNCell([network] * self._num_layers)
define each cell separately and then join them together so they dont share variables like the following :
cells = []
for _ in range(self._num_layers):
cell = tf.contrib.rnn.GRUCell(self._num_hidden) # Or LSTMCell(num_units)
cell = tf.contrib.rnn.DropoutWrapper(
cell, output_keep_prob=1.0 - self.dropout)
cells.append(cell)
network = tf.contrib.rnn.MultiRNNCell(cells)
Hey guys, i saw last
return zero tensor, not real last hidden
.
output, _ = tf.nn.dynamic_rnn(network, self.data, dtype=tf.float32)
# Select last output.
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)
I have two more questions about this project, hoping that someone who's still reading this blog would be able to answer.
- For a while I was sure about this, but now I'm a bit worried: do I understand it correctly, that one "row" in data represent a feature vector at time t, and the next row represents a feature vector at time t+1?
- Would it be possible to include an embedding layer below the GRU layers that would transform rows of binaries with high dimensionality (size) to a lower dimensionality, and if so, how?
Thank you in advance for any help.
Hi, great blog post, thank you!
Shouldn't lines 35-38 be replaced with
to fix dropout (which is currently reversed) and to give each layer its own weights?