Skip to content

Instantly share code, notes, and snippets.

View NMZivkovic's full-sized avatar

Nikola Živković NMZivkovic

View GitHub Profile
class DecoderLayer(Layer):
def __init__(self, num_neurons, num_hidden_neurons, num_heads):
super(DecoderLayer, self).__init__()
# Build multi head attention layers and necessary additional layers
self.multi_head_attention_layer1, self.attention_dropout1, self.attention_normalization1 =\
build_multi_head_attention_layers(num_neurons, num_heads)
self.multi_head_attention_layer2, self.attention_dropout2, self.attention_normalization2 =\
build_multi_head_attention_layers(num_neurons, num_heads)
class EncoderLayer(Layer):
def __init__(self, num_neurons, num_hidden_neurons, num_heads):
super(EncoderLayer, self).__init__()
# Build multi head attention layer and necessary additional layers
self.multi_head_attention_layer, self.attention_dropout, self.attention_normalization = \
build_multi_head_attention_layers(num_neurons, num_heads)
# Build feed-forward neural network and necessary additional layers
self.feed_forward_layer, self.feed_forward_dropout, self.feed_forward_normalization = \
def build_multi_head_attention_layers(num_neurons, num_heads):
multi_head_attention_layer = MultiHeadAttentionLayer(num_neurons, num_heads)
dropout = tf.keras.layers.Dropout(0.1)
normalization = LayerNormalization(epsilon=1e-6)
return multi_head_attention_layer, dropout, normalization
def build_feed_forward_layers(num_neurons, num_hidden_neurons):
feed_forward_layer = tf.keras.Sequential()
feed_forward_layer.add(Dense(num_hidden_neurons, activation='relu'))
feed_forward_layer.add(Dense(num_neurons))
class PreProcessingLayer(Layer):
def __init__(self, num_neurons, vocabular_size):
super(PreProcessingLayer, self).__init__()
# Initialize
self.num_neurons = num_neurons
# Add embedings and positional encoding
self.embedding = Embedding(vocabular_size, self.num_neurons)
positional_encoding_handler = PositionalEncoding(vocabular_size, self.num_neurons)
import keras.layers as L
import keras.models as M
my_input = L.Input(shape=(100,))
intermediate = L.Dense(10, activation='relu')(my_input)
my_output = L.Dense(1, activation='softmax')(intermediate)
model = M.Model(input=my_input, output=my_output)
class MultiHeadAttentionLayer(Layer):
def __init__(self, num_neurons, num_heads):
super(MultiHeadAttentionLayer, self).__init__()
self.num_heads = num_heads
self.num_neurons = num_neurons
self.depth = num_neurons // self.num_heads
self.attention_layer = ScaledDotProductAttentionLayer()
self.q_layer = Dense(num_neurons)
class MultiHeadAttentionLayer(Layer):
def __init__(self, num_neurons, num_heads):
super(MultiHeadAttentionLayer, self).__init__()
self.num_heads = num_heads
self.num_neurons = num_neurons
self.depth = num_neurons // self.num_heads
self.attention_layer = ScaledDotProductAttentionLayer()
self.q_layer = Dense(num_neurons)
class ScaledDotProductAttentionLayer():
def calculate_output_weights(self, q, k, v, mask):
qk = tf.matmul(q, k, transpose_b=True)
dk = tf.cast(tf.shape(k)[-1], tf.float32)
scaled_attention = qk / tf.math.sqrt(dk)
if mask is not None:
scaled_attention_logits += (mask * -1e9)
weights = tf.nn.softmax(scaled_attention, axis=-1)
maskHandler = MaskHandler()
x = tf.constant([[1, 2, 0, 0, 6], [1, 1, 1, 0, 0], [0, 0, 0, 6, 9]])
mask = maskHandler.padding_mask(x)
print("Padding Mask Example:")
print("-----------")
print(mask)
print("-----------")
x = tf.random.uniform((1, 3))
class MaskHandler(object):
def padding_mask(self, sequence):
sequence = tf.cast(tf.math.equal(sequence, 0), tf.float32)
return sequence[:, tf.newaxis, tf.newaxis, :]
def look_ahead_mask(self, size):
mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
return mask