Last active
May 5, 2018 09:08
-
-
Save simonnanty/bf56dd6ae3cc9566cd3f004431ff512d to your computer and use it in GitHub Desktop.
Implementation of Minimal RNN [M. Chen, 2017]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from tensorflow.python.ops import math_ops, init_ops | |
from tensorflow.python.ops.rnn_cell_impl import RNNCell, _linear | |
class MinimalRNNCell(RNNCell): | |
"""Minimal RNN where Phi is a multi-layer perceptron. | |
This implementation is based on: | |
Minmin Chen (2017) | |
"MinimalRNN: Toward More Interpretable and Trainable Recurrent Neural Networks" | |
https://arxiv.org/abs/1711.06788.pdf | |
""" | |
def __init__(self, | |
num_units, | |
activation=None, | |
kernel_initializer=None, | |
bias_initializer=None): | |
"""Initialize the parameters for a cell. | |
Args: | |
num_units: list of int, layer sizes for Phi | |
kernel_initializer: (optional) The initializer to use for the weight and | |
projection matrices. | |
bias_initializer: (optional) The initializer to use for the bias matrices. | |
Default: vectors of ones. | |
""" | |
super(MinimalRNNCell, self).__init__(_reuse=True) | |
self._activation = activation or math_ops.tanh | |
self._num_units = num_units | |
self._kernel_initializer = kernel_initializer | |
self._bias_initializer = bias_initializer | |
@property | |
def state_size(self): | |
return self._num_units[-1] | |
@property | |
def output_size(self): | |
return self._num_units[-1] | |
def __call__(self, inputs, state, scope=None): | |
"""Run one step of minimal RNN. | |
Args: | |
inputs: input Tensor, 2D, batch x num_units. | |
state: a state Tensor, `2-D, batch x state_size`. | |
Returns: | |
A tuple containing: | |
- A `2-D, [batch x num_units]`, Tensor representing the output of the | |
cell after reading `inputs` when previous state was `state`. | |
- A `2-D, [batch x num_units]`, Tensor representing the new state of cell after reading `inputs` when | |
the previous state was `state`. Same type and shape(s) as `state`. | |
Raises: | |
ValueError: | |
- If input size cannot be inferred from inputs via | |
static shape inference. | |
- If state is not `2D`. | |
""" | |
# Phi projection to a latent space / candidate | |
z = inputs | |
for i, layer_size in enumerate(self._num_units): | |
with tf.variable_scope("phi_" + str(i)): | |
z = self._activation(_linear( | |
z, | |
layer_size, | |
True, | |
bias_initializer=self._bias_initializer, | |
kernel_initializer=self._kernel_initializer)) | |
# Update gate | |
bias_ones = self._bias_initializer | |
if self._bias_initializer is None: | |
bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) | |
with tf.variable_scope("update_gate"): | |
u = math_ops.sigmoid(_linear( | |
[state, z], | |
self._num_units[-1], | |
True, | |
bias_initializer=bias_ones, | |
kernel_initializer=self._kernel_initializer)) | |
# Activation step | |
new_h = u * state + (1 - u) * z | |
return new_h, new_h |
Glad you find it useful.
You're absolutely right for line 76. It's the state and not inputs (as z is already a transformation of inputs)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, thank you for opening your code example.
I forked your code example to a new repository for personal study and evaluation.
https://github.com/rhee-airilab/minmin-minimalrnn
BTW, I guess the line 76 must be
[sate, z]
rather than[inputs, z]
, according to [M. Chen, 2017]