Last active
October 26, 2018 07:08
-
-
Save Lanme/a51691b60b3804e804abafde5b5a529c to your computer and use it in GitHub Desktop.
quick_code_to_keras
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#attn和capsule来自 https://github.com/plantsgo | |
from keras import backend as K | |
from keras.layers import Layer | |
from keras import initializers, regularizers, constraints | |
def dot_product(x, kernel): | |
""" | |
Wrapper for dot product operation, in order to be compatible with both | |
Theano and Tensorflow | |
Args: | |
x (): input | |
kernel (): weights | |
Returns: | |
""" | |
if K.backend() == 'tensorflow': | |
return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) | |
else: | |
return K.dot(x, kernel) | |
class AttentionWithContext(Layer): | |
""" | |
Attention operation, with a context/query vector, for temporal data. | |
Supports Masking. | |
Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf] | |
"Hierarchical Attention Networks for Document Classification" | |
by using a context vector to assist the attention | |
# Input shape | |
3D tensor with shape: `(samples, steps, features)`. | |
# Output shape | |
2D tensor with shape: `(samples, features)`. | |
How to use: | |
Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True. | |
The dimensions are inferred based on the output shape of the RNN. | |
Note: The layer has been tested with Keras 2.0.6 | |
Example: | |
model.add(LSTM(64, return_sequences=True)) | |
model.add(AttentionWithContext()) | |
# next add a Dense layer (for classification/regression) or whatever... | |
""" | |
def __init__(self, | |
W_regularizer=None, u_regularizer=None, b_regularizer=None, | |
W_constraint=None, u_constraint=None, b_constraint=None, | |
bias=True, **kwargs): | |
self.supports_masking = True | |
self.init = initializers.get('glorot_uniform') | |
self.W_regularizer = regularizers.get(W_regularizer) | |
self.u_regularizer = regularizers.get(u_regularizer) | |
self.b_regularizer = regularizers.get(b_regularizer) | |
self.W_constraint = constraints.get(W_constraint) | |
self.u_constraint = constraints.get(u_constraint) | |
self.b_constraint = constraints.get(b_constraint) | |
self.bias = bias | |
super(AttentionWithContext, self).__init__(**kwargs) | |
def build(self, input_shape): | |
assert len(input_shape) == 3 | |
self.W = self.add_weight((input_shape[-1], input_shape[-1],), | |
initializer=self.init, | |
name='{}_W'.format(self.name), | |
regularizer=self.W_regularizer, | |
constraint=self.W_constraint) | |
if self.bias: | |
self.b = self.add_weight((input_shape[-1],), | |
initializer='zero', | |
name='{}_b'.format(self.name), | |
regularizer=self.b_regularizer, | |
constraint=self.b_constraint) | |
self.u = self.add_weight((input_shape[-1],), | |
initializer=self.init, | |
name='{}_u'.format(self.name), | |
regularizer=self.u_regularizer, | |
constraint=self.u_constraint) | |
super(AttentionWithContext, self).build(input_shape) | |
def compute_mask(self, input, input_mask=None): | |
# do not pass the mask to the next layers | |
return None | |
def call(self, x, mask=None): | |
uit = dot_product(x, self.W) | |
if self.bias: | |
uit += self.b | |
uit = K.tanh(uit) | |
ait = dot_product(uit, self.u) | |
a = K.exp(ait) | |
# apply mask after the exp. will be re-normalized next | |
if mask is not None: | |
# Cast the mask to floatX to avoid float64 upcasting in theano | |
a *= K.cast(mask, K.floatx()) | |
# in some cases especially in the early stages of training the sum may be almost zero | |
# and this results in NaN's. A workaround is to add a very small positive number ε to the sum. | |
# a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) | |
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) | |
a = K.expand_dims(a) | |
weighted_input = x * a | |
return K.sum(weighted_input, axis=1) | |
def compute_output_shape(self, input_shape): | |
return input_shape[0], input_shape[-1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#https://www.kaggle.com/chongjiujjin/capsule-net-with-gru | |
# A Capsule Implement with Pure Keras | |
def squash(x, axis=-1): | |
# s_squared_norm is really small | |
# s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon() | |
# scale = K.sqrt(s_squared_norm)/ (0.5 + s_squared_norm) | |
# return scale * x | |
s_squared_norm = K.sum(K.square(x), axis, keepdims=True) | |
scale = K.sqrt(s_squared_norm + K.epsilon()) | |
return x / scale | |
class Capsule(Layer): | |
def __init__(self, num_capsule, dim_capsule, routings=3, kernel_size=(9, 1), share_weights=True, | |
activation='default', **kwargs): | |
super(Capsule, self).__init__(**kwargs) | |
self.num_capsule = num_capsule | |
self.dim_capsule = dim_capsule | |
self.routings = routings | |
self.kernel_size = kernel_size | |
self.share_weights = share_weights | |
if activation == 'default': | |
self.activation = squash | |
else: | |
self.activation = Activation(activation) | |
def build(self, input_shape): | |
super(Capsule, self).build(input_shape) | |
input_dim_capsule = input_shape[-1] | |
if self.share_weights: | |
self.W = self.add_weight(name='capsule_kernel', | |
shape=(1, input_dim_capsule, | |
self.num_capsule * self.dim_capsule), | |
# shape=self.kernel_size, | |
initializer='glorot_uniform', | |
trainable=True) | |
else: | |
input_num_capsule = input_shape[-2] | |
self.W = self.add_weight(name='capsule_kernel', | |
shape=(input_num_capsule, | |
input_dim_capsule, | |
self.num_capsule * self.dim_capsule), | |
initializer='glorot_uniform', | |
trainable=True) | |
def call(self, u_vecs): | |
if self.share_weights: | |
u_hat_vecs = K.conv1d(u_vecs, self.W) | |
else: | |
u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) | |
batch_size = K.shape(u_vecs)[0] | |
input_num_capsule = K.shape(u_vecs)[1] | |
u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, | |
self.num_capsule, self.dim_capsule)) | |
u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) | |
# final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule] | |
b = K.zeros_like(u_hat_vecs[:, :, :, 0]) # shape = [None, num_capsule, input_num_capsule] | |
for i in range(self.routings): | |
b = K.permute_dimensions(b, (0, 2, 1)) # shape = [None, input_num_capsule, num_capsule] | |
c = K.softmax(b) | |
c = K.permute_dimensions(c, (0, 2, 1)) | |
b = K.permute_dimensions(b, (0, 2, 1)) | |
outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2])) | |
if i < self.routings - 1: | |
b = K.batch_dot(outputs, u_hat_vecs, [2, 3]) | |
return outputs | |
def compute_output_shape(self, input_shape): | |
return (None, self.num_capsule, self.dim_capsule) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment