Skip to content

Instantly share code, notes, and snippets.

View pythonlessons's full-sized avatar

Rokas Liuberskis pythonlessons

View GitHub Profile
@pythonlessons
pythonlessons / build_transformer_0.py
Created August 22, 2023 14:47
build_transformer
class EncoderLayer(tf.keras.layers.Layer):
"""
A single layer of the Encoder. Usually there are multiple layers stacked on top of each other.
Methods:
call: Performs the forward pass of the layer.
Attributes:
self_attention (GlobalSelfAttention): The global self-attention layer.
ffn (FeedForward): The feed-forward layer.
@pythonlessons
pythonlessons / transformer_attention_12.css
Created August 16, 2023 12:45
transformer_attention
encoder_embeddings shape (1, 100, 512)
feed_forward_output shape (1, 100, 512)
@pythonlessons
pythonlessons / transformer_attention_11.css
Created August 16, 2023 12:45
transformer_attention
decoder_embeddings shape (1, 110, 512)
causal_self_attention_output shape (1, 110, 512)
Difference between the two outputs: 0.0
@pythonlessons
pythonlessons / transformer_attention_10.css
Created August 16, 2023 12:45
transformer_attention
encoder_embeddings shape (1, 100, 512)
global_self_attention_output shape (1, 100, 512)
@pythonlessons
pythonlessons / transformer_attention_9.css
Created August 16, 2023 12:45
transformer_attention
encoder_embeddings shape (1, 100, 512)
decoder_embeddings shape (1, 110, 512)
cross_attention_output shape (1, 110, 512)
@pythonlessons
pythonlessons / transformer_attention_8.py
Created August 16, 2023 12:45
transformer_attention
encoder_vocab_size = 1000
d_model = 512
encoder_embedding_layer = PositionalEmbedding(vocab_size, d_model)
random_encoder_input = np.random.randint(0, encoder_vocab_size, size=(1, 100))
encoder_embeddings = encoder_embedding_layer(random_encoder_input)
print("encoder_embeddings shape", encoder_embeddings.shape)
@pythonlessons
pythonlessons / transformer_attention_7.py
Created August 16, 2023 12:45
transformer_attention
class FeedForward(tf.keras.layers.Layer):
"""
A class that implements the feed-forward layer.
Methods:
call: Performs the forward pass of the layer.
Attributes:
seq (tf.keras.Sequential): The sequential layer that contains the feed-forward layers. It applies the two feed-forward layers and the dropout layer.
add (tf.keras.layers.Add): The Add layer.
@pythonlessons
pythonlessons / transformer_attention_6.py
Created August 16, 2023 12:45
transformer_attention
decoder_vocab_size = 1100
d_model = 512
decoder_embedding_layer = PositionalEmbedding(vocab_size, d_model)
random_decoder_input = np.random.randint(0, decoder_vocab_size, size=(1, 110))
decoder_embeddings = decoder_embedding_layer(random_decoder_input)
print("decoder_embeddings shape", decoder_embeddings.shape)
@pythonlessons
pythonlessons / transformer_attention_5.py
Created August 16, 2023 12:45
transformer_attention
class CausalSelfAttention(BaseAttention):
"""
Call self attention on the input sequence, ensuring that each position in the
output depends only on previous positions (i.e. a causal model).
Methods:
call: Performs the forward pass of the layer.
Attributes:
mha (tf.keras.layers.MultiHeadAttention): The MultiHeadAttention layer.
@pythonlessons
pythonlessons / transformer_attention_4.py
Created August 16, 2023 12:45
transformer_attention
encoder_vocab_size = 1000
d_model = 512
encoder_embedding_layer = PositionalEmbedding(vocab_size, d_model)
random_encoder_input = np.random.randint(0, encoder_vocab_size, size=(1, 100))
encoder_embeddings = encoder_embedding_layer(random_encoder_input)
print("encoder_embeddings shape", encoder_embeddings.shape)