This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
self.spatial_projection = layers.Dense(units=num_patches, bias_initializer="Ones") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
v_channels = tf.linalg.matrix_transpose(v) | |
v_projected = self.spatial_projection(v_channels) | |
v_projected = tf.linalg.matrix_transpose(v_projected) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def call(self, inputs): | |
x = self.normalize1(inputs) | |
x_projected = self.channel_projection1(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# x_spatial shape: [batch_size, num_patches, embedding_dim]. | |
x_spatial = self.spatial_gating_unit(x_projected) | |
# x_projected shape: [batch_size, num_patches, embedding_dim]. | |
x_projected = self.channel_projection2(x_spatial) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class gMLPLayer(layers.Layer): | |
def __init__(self, num_patches, embedding_dim, dropout_rate, *args, **kwargs): | |
super(gMLPLayer, self).__init__(*args, **kwargs) | |
self.channel_projection1 = keras.Sequential( | |
[ | |
layers.Dense(units=embedding_dim * 2), | |
layers.ReLU(), | |
layers.Dropout(rate=dropout_rate), | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gmlp_blocks = keras.Sequential( | |
[gMLPLayer(num_patches, embedding_dim, dropout_rate) for _ in range(num_blocks)] | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from datasets import load_dataset | |
from transformers import TFMT5ForConditionalGeneration, MT5Tokenizer, DataCollatorForSeq2Seq | |
from tensorflow.keras.optimizers import Adam | |
tokenizer = MT5Tokenizer.from_pretrained("google/mt5-small") | |
model = TFMT5ForConditionalGeneration.from_pretrained("google/mt5-small") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dataset = load_dataset("csv", data_files="train.csv") | |
dataset = dataset["train"].shuffle(seed=42) | |
def preprocess_function(examples): | |
padding = "max_length" | |
max_length = 200 | |
inputs = [ex for ex in examples["Text"]] | |
targets = [ex for ex in examples["Expected"]] | |
model_inputs = tokenizer(inputs, max_length=max_length, padding=padding, truncation=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_dataset = dataset.map(preprocess_function, batched=True, desc="Running tokenizer") | |
data_collator = DataCollatorForSeq2Seq( | |
tokenizer, | |
model=model, | |
label_pad_token_id=tokenizer.pad_token_id, | |
pad_to_multiple_of=64, | |
return_tensors="np") | |
tf_train_dataset = model.prepare_tf_dataset( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model.compile(optimizer=Adam(3e-5)) | |
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3) | |
model.fit(tf_train_dataset, epochs=10, callbacks=[early_stopping]) |