Skip to content

Instantly share code, notes, and snippets.

View pythonlessons's full-sized avatar

Rokas Liuberskis pythonlessons

View GitHub Profile
@pythonlessons
pythonlessons / transformer_attention_3.py
Created August 16, 2023 12:45
transformer_attention
class GlobalSelfAttention(BaseAttention):
"""
A class that implements the global self-attention layer by inheriting from the BaseAttention class.
This layer is used to process a single sequence and attends to all the tokens in the sequence.
Methods:
call: Performs the forward pass of the layer.
Attributes:
mha (tf.keras.layers.MultiHeadAttention): The MultiHeadAttention layer.
@pythonlessons
pythonlessons / transformer_attention_2.py
Created August 16, 2023 12:45
transformer_attention
encoder_vocab_size = 1000
decoder_vocab_size = 1100
d_model = 512
encoder_embedding_layer = PositionalEmbedding(vocab_size, d_model)
decoder_embedding_layer = PositionalEmbedding(vocab_size, d_model)
random_encoder_input = np.random.randint(0, encoder_vocab_size, size=(1, 100))
random_decoder_input = np.random.randint(0, decoder_vocab_size, size=(1, 110))
@pythonlessons
pythonlessons / transformer_attention_1.py
Created August 16, 2023 12:45
transformer_attention
class CrossAttention(BaseAttention):
"""
A class that implements the cross-attention layer by inheriting from the BaseAttention class.
This layer is used to process two different sequences and attends to the context sequence while processing the query sequence.
Methods:
call: Performs the forward pass of the layer.
Attributes:
mha (tf.keras.layers.MultiHeadAttention): The MultiHeadAttention layer.
@pythonlessons
pythonlessons / transformer_attention_0.py
Created August 16, 2023 12:45
transformer_attention
class BaseAttention(tf.keras.layers.Layer):
"""
Base class for all attention layers. It contains the common functionality of all attention layers.
This layer contains a MultiHeadAttention layer, a LayerNormalization layer and an Add layer.
It is used as a base class for the GlobalSelfAttention, CausalSelfAttention and CrossAttention layers.
And it is not intended to be used directly.
Methods:
call: Performs the forward pass of the layer.
@pythonlessons
pythonlessons / transformers_introduction_5.css
Created August 10, 2023 13:19
transformers_introduction
random_input shape (1, 100)
PositionalEmbedding output (1, 100, 512)
@pythonlessons
pythonlessons / transformers_introduction_4.py
Created August 10, 2023 13:19
transformers_introduction
vocab_size = 1000
d_model = 512
embedding_layer = PositionalEmbedding(vocab_size, d_model)
random_input = np.random.randint(0, vocab_size, size=(1, 100))
output = embedding_layer(random_input)
print("random_input shape", random_input.shape)
print("PositionalEmbedding output", output.shape)
@pythonlessons
pythonlessons / transformers_introduction_3.py
Created August 10, 2023 13:19
transformers_introduction
class PositionalEmbedding(tf.keras.layers.Layer):
"""
A positional embedding layer combines the input embedding with a positional encoding that helps the Transformer
to understand the relative position of the input tokens. This layer takes the input of tokens and converts them
into sequence of embeddings vector. Then, it adds the positional encoding to the embeddings.
Methods:
compute_mask: Computes the mask to be applied to the embeddings.
call: Performs the forward pass of the layer.
"""
@pythonlessons
pythonlessons / transformers_introduction_2.py
Created August 10, 2023 13:19
transformers_introduction
pos_encoding /= tf.norm(pos_encoding, axis=1, keepdims=True)
p = pos_encoding[1000]
dots = tf.einsum('pd,d->p', pos_encoding, p).numpy()
plt.subplot(2, 1, 1)
plt.plot(dots)
plt.ylim([0, 1])
plt.plot([950, 950, float('nan'), 1050, 1050], [0, 1, float('nan'), 0, 1], color='k', label='Zoom')
plt.legend()
@pythonlessons
pythonlessons / transformers_introduction_1.py
Created August 10, 2023 13:19
transformers_introduction
import matplotlib.pyplot as plt
pos_encoding = positional_encoding(length=2048, depth=512)
# Check the shape.
print(pos_encoding.shape)
# Plot the dimensions.
plt.pcolormesh(pos_encoding.numpy().T, cmap='RdBu')
plt.ylabel('Depth')
@pythonlessons
pythonlessons / transformers_introduction_0.py
Created August 10, 2023 13:19
transformers_introduction
import numpy as np
import tensorflow as tf
for gpu in tf.config.experimental.list_physical_devices('GPU'):
tf.config.experimental.set_memory_growth(gpu, True)
def positional_encoding(length: int, depth: int):
"""
Generates a positional encoding for a given length and depth.