Last active
December 25, 2019 01:35
-
-
Save stoney95/9c71bb7f7008b7d01dcf8f6bf3afaeff to your computer and use it in GitHub Desktop.
Convolution Layer from https://arxiv.org/pdf/1710.00519.pdf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class AttentiveConv(Layer): | |
def __init__(self, kernel_activation='tanh', filters=3, **kwargs): | |
super(AttentiveConv, self).__init__(**kwargs) | |
self.kernel_activation = activations.get(kernel_activation) | |
if filters%2 == 0: | |
self.filters = filters - 1 | |
else: | |
self.filters = filters | |
self.filters = filters | |
K.set_floatx('float32') | |
def build(self, input_shape): | |
self.num_words = input_shape[0][1] | |
self.em_dim = input_shape[0][2] | |
self.W2 = self.add_weight(shape=(self.em_dim, self.filters*self.em_dim), dtype=K.floatx(), name='att_cont_weight', trainable=True, initializer='glorot_normal') | |
self.We = self.add_weight(shape=(self.em_dim, self.em_dim), dtype=K.floatx(), name='window_weight', trainable=True, initializer='glorot_normal') | |
super(AttentiveConv, self).build(input_shape) | |
def compute_output_shape(self, input_shape): | |
#the input is a list of two tensors. As this layers computes a score for every element of the first input I just | |
#return the shape of this tensor. | |
return input_shape[0] | |
def get_config(self): | |
config = {'kernel_activation': activations.serialize(self.kernel_activation), | |
'filters': self.filters} | |
base_config = super(AttentiveConv, self).get_config() | |
return dict(list(base_config.items()) + list(config.items())) | |
def call(self, x, mask=None): | |
#x is a list of two tensors | |
#casting makes no sense so I deleted it | |
text = x[0] | |
context = x[1] | |
#applies bilinear energy funtion (text * We * context) | |
#and weights the computed feature map like in equation 6 (W2 * ci) | |
#shape of text/context is (batch_size, num_words, em_dim), num_words for text is 200 and em_dim is also 200. | |
#I want to do the computation for every sample of the batch. I found batch_matmul but thats not available in | |
#tensorflow 1.5 | |
#shape of weighted_attentive_context should be the same shape as text. | |
weighted_attentive_context = self._compute_attentive_context(text, context) | |
return weighted_attentive_context | |
def _compute_attentive_context(self, text, context): | |
#computes the context-score for every vector like equation 2 | |
temp = K.dot(text, self.We) | |
scores = K.batch_dot(temp, K.permute_dimensions(context, (0,2,1))) | |
#softmax along every vector-element | |
#scores = text | |
scores_softmax = activations.softmax(scores, axis=1) | |
#computes the context featur_map like equation 4 | |
res = tf.matmul(scores_softmax, context) | |
#weights the output like equation 6 | |
res = K.permute_dimensions(K.dot(self.W2,K.permute_dimensions(res, (0,2,1))),(1,2,0)) | |
#res = scores | |
return res |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment