pythonlessons · August 22, 2023 14:47
diff --git a/build_transformer_6.py b/build_transformer_6.py
 class Decoder(tf.keras.layers.Layer):
    """
    A custom TensorFlow layer that implements the Decoder. This layer is mostly used in the Transformer models
    for natural language processing tasks, such as machine translation, text summarization or text classification.

    Methods:
        call: Performs the forward pass of the layer.

    Attributes:
        d_model (int): The dimensionality of the model.
        num_layers (int): The number of layers in the decoder.
        pos_embedding (PositionalEmbedding): The positional embedding layer.
        dec_layers (list): The list of decoder layers.
        dropout (tf.keras.layers.Dropout): The dropout layer.
    """
    def __init__(self, num_layers: int, d_model: int, num_heads: int, dff: int, vocab_size: int, dropout_rate: float=0.1):
        """
        Constructor of the Decoder.

        Args:
            num_layers (int): The number of layers in the decoder.
            d_model (int): The dimensionality of the model.
            num_heads (int): The number of heads in the multi-head attention layer.
            dff (int): The dimensionality of the feed-forward layer.
            vocab_size (int): The size of the vocabulary.
            dropout_rate (float): The dropout rate.
        """
        super(Decoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size, d_model=d_model)
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.dec_layers = [
            DecoderLayer(
                d_model=d_model, 
                num_heads=num_heads, 
                dff=dff, 
                dropout_rate=dropout_rate) for _ in range(num_layers)]

        self.last_attn_scores = None

    def call(self, x: tf.Tensor, context: tf.Tensor) -> tf.Tensor:
        """
        The call function that performs the forward pass of the layer.

        Args:
            x (tf.Tensor): The input sequence of shape (batch_size, target_seq_len).
            context (tf.Tensor): The context sequence of shape (batch_size, input_seq_len, d_model).
        """
        # `x` is token-IDs shape (batch, target_seq_len)
        x = self.pos_embedding(x)  # (batch_size, target_seq_len, d_model)

        x = self.dropout(x)

        for i in range(self.num_layers):
            x  = self.dec_layers[i](x, context)

        self.last_attn_scores = self.dec_layers[-1].last_attn_scores

        # The shape of x is (batch_size, target_seq_len, d_model).
        return x
	class Decoder(tf.keras.layers.Layer):
	"""
	A custom TensorFlow layer that implements the Decoder. This layer is mostly used in the Transformer models
	for natural language processing tasks, such as machine translation, text summarization or text classification.

	Methods:
	call: Performs the forward pass of the layer.

	Attributes:
	d_model (int): The dimensionality of the model.
	num_layers (int): The number of layers in the decoder.
	pos_embedding (PositionalEmbedding): The positional embedding layer.
	dec_layers (list): The list of decoder layers.
	dropout (tf.keras.layers.Dropout): The dropout layer.
	"""
	def __init__(self, num_layers: int, d_model: int, num_heads: int, dff: int, vocab_size: int, dropout_rate: float=0.1):
	"""
	Constructor of the Decoder.

	Args:
	num_layers (int): The number of layers in the decoder.
	d_model (int): The dimensionality of the model.
	num_heads (int): The number of heads in the multi-head attention layer.
	dff (int): The dimensionality of the feed-forward layer.
	vocab_size (int): The size of the vocabulary.
	dropout_rate (float): The dropout rate.
	"""
	super(Decoder, self).__init__()

	self.d_model = d_model
	self.num_layers = num_layers

	self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size, d_model=d_model)
	self.dropout = tf.keras.layers.Dropout(dropout_rate)
	self.dec_layers = [
	DecoderLayer(
	d_model=d_model,
	num_heads=num_heads,
	dff=dff,
	dropout_rate=dropout_rate) for _ in range(num_layers)]

	self.last_attn_scores = None

	def call(self, x: tf.Tensor, context: tf.Tensor) -> tf.Tensor:
	"""
	The call function that performs the forward pass of the layer.

	Args:
	x (tf.Tensor): The input sequence of shape (batch_size, target_seq_len).
	context (tf.Tensor): The context sequence of shape (batch_size, input_seq_len, d_model).
	"""
	# `x` is token-IDs shape (batch, target_seq_len)
	x = self.pos_embedding(x) # (batch_size, target_seq_len, d_model)

	x = self.dropout(x)

	for i in range(self.num_layers):
	x = self.dec_layers[i](x, context)

	self.last_attn_scores = self.dec_layers[-1].last_attn_scores

	# The shape of x is (batch_size, target_seq_len, d_model).
	return x