Last active
December 24, 2021 03:53
-
-
Save danijar/c7ec9a30052127c7a1ad169eeb83f159 to your computer and use it in GitHub Desktop.
TensorFlow Sequence Classification
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example for my blog post at: | |
# https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/ | |
import functools | |
import sets | |
import tensorflow as tf | |
def lazy_property(function): | |
attribute = '_' + function.__name__ | |
@property | |
@functools.wraps(function) | |
def wrapper(self): | |
if not hasattr(self, attribute): | |
setattr(self, attribute, function(self)) | |
return getattr(self, attribute) | |
return wrapper | |
class SequenceClassification: | |
def __init__(self, data, target, dropout, num_hidden=200, num_layers=3): | |
self.data = data | |
self.target = target | |
self.dropout = dropout | |
self._num_hidden = num_hidden | |
self._num_layers = num_layers | |
self.prediction | |
self.error | |
self.optimize | |
@lazy_property | |
def prediction(self): | |
# Recurrent network. | |
network = tf.contrib.rnn.GRUCell(self._num_hidden) | |
network = tf.contrib.rnn.DropoutWrapper( | |
network, output_keep_prob=self.dropout) | |
network = tf.contrib.rnn.MultiRNNCell([network] * self._num_layers) | |
output, _ = tf.nn.dynamic_rnn(network, self.data, dtype=tf.float32) | |
# Select last output. | |
output = tf.transpose(output, [1, 0, 2]) | |
last = tf.gather(output, int(output.get_shape()[0]) - 1) | |
# Softmax layer. | |
weight, bias = self._weight_and_bias( | |
self._num_hidden, int(self.target.get_shape()[1])) | |
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) | |
return prediction | |
@lazy_property | |
def cost(self): | |
cross_entropy = -tf.reduce_sum(self.target * tf.log(self.prediction)) | |
return cross_entropy | |
@lazy_property | |
def optimize(self): | |
learning_rate = 0.003 | |
optimizer = tf.train.RMSPropOptimizer(learning_rate) | |
return optimizer.minimize(self.cost) | |
@lazy_property | |
def error(self): | |
mistakes = tf.not_equal( | |
tf.argmax(self.target, 1), tf.argmax(self.prediction, 1)) | |
return tf.reduce_mean(tf.cast(mistakes, tf.float32)) | |
@staticmethod | |
def _weight_and_bias(in_size, out_size): | |
weight = tf.truncated_normal([in_size, out_size], stddev=0.01) | |
bias = tf.constant(0.1, shape=[out_size]) | |
return tf.Variable(weight), tf.Variable(bias) | |
def main(): | |
# We treat images as sequences of pixel rows. | |
train, test = sets.Mnist() | |
_, rows, row_size = train.data.shape | |
num_classes = train.target.shape[1] | |
data = tf.placeholder(tf.float32, [None, rows, row_size]) | |
target = tf.placeholder(tf.float32, [None, num_classes]) | |
dropout = tf.placeholder(tf.float32) | |
model = SequenceClassification(data, target, dropout) | |
sess = tf.Session() | |
sess.run(tf.global_variables_initializer()) | |
for epoch in range(10): | |
for _ in range(100): | |
batch = train.sample(10) | |
sess.run(model.optimize, { | |
data: batch.data, target: batch.target, dropout: 0.5}) | |
error = sess.run(model.error, { | |
data: test.data, target: test.target, dropout: 1}) | |
print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error)) | |
if __name__ == '__main__': | |
main() |
I have two more questions about this project, hoping that someone who's still reading this blog would be able to answer.
- For a while I was sure about this, but now I'm a bit worried: do I understand it correctly, that one "row" in data represent a feature vector at time t, and the next row represents a feature vector at time t+1?
- Would it be possible to include an embedding layer below the GRU layers that would transform rows of binaries with high dimensionality (size) to a lower dimensionality, and if so, how?
Thank you in advance for any help.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey guys, i saw
last
return zero tensor, not reallast hidden
.@danijar