-
-
Save nigeljyng/37552fb4869a5e81338f82b338a304d3 to your computer and use it in GitHub Desktop.
class AttentionWithContext(Layer): | |
""" | |
Attention operation, with a context/query vector, for temporal data. | |
Supports Masking. | |
Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf] | |
"Hierarchical Attention Networks for Document Classification" | |
by using a context vector to assist the attention | |
# Input shape | |
3D tensor with shape: `(samples, steps, features)`. | |
# Output shape | |
2D tensor with shape: `(samples, features)`. | |
:param kwargs: | |
Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True. | |
The dimensions are inferred based on the output shape of the RNN. | |
Example: | |
model.add(LSTM(64, return_sequences=True)) | |
model.add(AttentionWithContext()) | |
""" | |
def __init__(self, | |
W_regularizer=None, u_regularizer=None, b_regularizer=None, | |
W_constraint=None, u_constraint=None, b_constraint=None, | |
bias=True, **kwargs): | |
self.supports_masking = True | |
self.init = initializations.get('glorot_uniform') | |
self.W_regularizer = regularizers.get(W_regularizer) | |
self.u_regularizer = regularizers.get(u_regularizer) | |
self.b_regularizer = regularizers.get(b_regularizer) | |
self.W_constraint = constraints.get(W_constraint) | |
self.u_constraint = constraints.get(u_constraint) | |
self.b_constraint = constraints.get(b_constraint) | |
self.bias = bias | |
super(AttentionWithContext, self).__init__(**kwargs) | |
def build(self, input_shape): | |
assert len(input_shape) == 3 | |
self.W = self.add_weight((input_shape[-1], input_shape[-1],), | |
initializer=self.init, | |
name='{}_W'.format(self.name), | |
regularizer=self.W_regularizer, | |
constraint=self.W_constraint) | |
if self.bias: | |
self.b = self.add_weight((input_shape[-1],), | |
initializer='zero', | |
name='{}_b'.format(self.name), | |
regularizer=self.b_regularizer, | |
constraint=self.b_constraint) | |
self.u = self.add_weight((input_shape[-1],), | |
initializer=self.init, | |
name='{}_u'.format(self.name), | |
regularizer=self.u_regularizer, | |
constraint=self.u_constraint) | |
super(AttentionWithContext, self).build(input_shape) | |
def compute_mask(self, input, input_mask=None): | |
# do not pass the mask to the next layers | |
return None | |
def call(self, x, mask=None): | |
uit = K.dot(x, self.W) | |
if self.bias: | |
uit += self.b | |
uit = K.tanh(uit) | |
ait = K.dot(uit, self.u) | |
a = K.exp(ait) | |
# apply mask after the exp. will be re-normalized next | |
if mask is not None: | |
# Cast the mask to floatX to avoid float64 upcasting in theano | |
a *= K.cast(mask, K.floatx()) | |
# in some cases especially in the early stages of training the sum may be almost zero | |
# and this results in NaN's. A workaround is to add a very small positive number ε to the sum. | |
# a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) | |
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) | |
a = K.expand_dims(a) | |
weighted_input = x * a | |
return K.sum(weighted_input, axis=1) | |
def get_output_shape_for(self, input_shape): | |
return input_shape[0], input_shape[-1] | |
def compute_output_shape(self, input_shape): | |
"""Shape transformation logic so Keras can infer output shape | |
""" | |
return (input_shape[0], input_shape[-1]) | |
Is the issue "IndexError: pop index out of range" resolved
Just tried rmdort's fork. The issue reported by abali96 disappears! Also, he added tensor shapes in comments and this helps to understand what happens under the hood.
I am getting
Traceback (most recent call last):
File "test.py", line 25, in
from attention import AttentionWithContext
File "/ssd/MachineLearning/Python/NLP/SplitAndSpellSentence/attention.py", line 1, in
class AttentionWithContext(Layer):
NameError: name 'Layer' is not defined
Code is simple
model = Sequential()
model.add(recurrent.GRU(hidden_neurons, input_shape=( CONFIG.max_input_wordchunk_len, len(chars)),
return_sequences=True,
kernel_initializer=CONFIG.initialization, activation='linear'))
model.add(AttentionWithContext())
model.add(Dense(len(chars), activation='sigmoid',kernel_initializer=CONFIG.initialization))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
Edit: My bad. from keras.engine.topology import Layer resolved.
I dont know why, but, getting dimension error.
Code:
def generate_model(output_len, chars=None):
"""Generate the model"""
print('Building model...')
chars = chars or CHARS
in_out_neurons = CONFIG.max_input_len
hidden_neurons = CONFIG.hidden_size
model = Sequential()
model.add(recurrent.GRU(512, input_shape=( 128, 100),
return_sequences=True,
kernel_initializer=CONFIG.initialization, activation='linear'))
model.add(AttentionWithContext())
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
return model
and the error is
________________________________
_________________________________
Layer (type) Output Shape Param #
=================================================================
gru_1 (GRU) (None, 128, 512) 941568
_________________________________________________________________
attention_with_context_1 (At (None, 512) 263168
=================================================================
Total params: 1,204,736
Trainable params: 1,204,736
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/500
Traceback (most recent call last):
File "test.py", line 580, in <module>
train_speller()
File "test.py", line 482, in train_speller
itarative_train(model)
File "test.py", line 467, in itarative_train
class_weight=None, max_queue_size=10, workers=1)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/models.py", line 1315, in fit_generator
initial_epoch=initial_epoch)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 2230, in fit_generator
class_weight=class_weight)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1877, in train_on_batch
class_weight=class_weight)
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1480, in _standardize_user_data
exception_prefix='target')
File "/ssd/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 113, in _standardize_input_data
'with shape ' + str(data_shape))
ValueError: Error when checking target: expected attention_with_context_1 to have 2 dimensions, but got array with shape (64, 128, 100)
Any idea?
as output share is anyway 3dim, i tried to change line 81 as
return (input_shape[0], input_shape[1],input_shape[2])
then for different error and model is not compiling
I have adapted the code for tensorflow and keras 2. Here is the fork
https://gist.github.com/rmdort/596e75e864295365798836d9e8636033