Skip to content

Instantly share code, notes, and snippets.

View NMZivkovic's full-sized avatar

Nikola Živković NMZivkovic

View GitHub Profile
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import Sequential
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
import numpy as np
for epoch in tqdm(range(20)):
training_loss.reset_states()
training_accuracy.reset_states()
for (batch, (input_language, target_language)) in enumerate(data_container.train_data):
train_step(input_language, target_language)
print ('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format(epoch, train_loss.result(), train_accuracy.result()))
train_step_signature = [
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
]
@tf.function(input_signature=train_step_signature)
def train_step(input_language, target_language):
target_input = target_language[:, :-1]
tartet_output = target_language[:, 1:]
# Initialize helpers
data_container = DataHandler()
maskHandler = MaskHandler()
# Initialize parameters
num_layers = 4
num_neurons = 128
num_hidden_layers = 512
num_heads = 8
loss_objective_function = SparseCategoricalCrossentropy(from_logits=True, reduction='none')
def padded_loss_function(real, prediction):
mask = tf.math.logical_not(tf.math.equal(real, 0))
loss = loss_objective_function(real, prediction)
mask = tf.cast(mask, dtype=loss.dtype)
loss *= mask
return tf.reduce_mean(loss)
learning_rate = Schedule(num_neurons)
optimizer = Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
class Schedule(LearningRateSchedule):
def __init__(self, num_neurons, warmup_steps=4000):
super(Schedule, self).__init__()
self.num_neurons = tf.cast(num_neurons, tf.float32)
self.warmup_steps = warmup_steps
def __call__(self, step):
arg1 = tf.math.rsqrt(step)
arg2 = step * (self.warmup_steps ** -1.5)
class Transformer(Model):
def __init__(self, num_layers, num_neurons, num_hidden_neurons, num_heads, input_vocabular_size, target_vocabular_size):
super(Transformer, self).__init__()
self.encoder = Encoder(num_neurons, num_hidden_neurons, num_heads, input_vocabular_size, num_layers)
self.decoder = Decoder(num_neurons, num_hidden_neurons, num_heads, target_vocabular_size, num_layers)
self.linear_layer = Dense(target_vocabular_size)
def call(self, transformer_input, tar, training, encoder_padding_mask, look_ahead_mask, decoder_padding_mask):
encoder_output = self.encoder(transformer_input, training, encoder_padding_mask)
decoder_output, attention_weights = self.decoder(tar, encoder_output, training, look_ahead_mask, decoder_padding_mask)
class Decoder(Layer):
def __init__(self, num_neurons, num_hidden_neurons, num_heads, vocabular_size, num_dec_layers=6):
super(Decoder, self).__init__()
self.num_dec_layers = num_dec_layers
self.pre_processing_layer = PreProcessingLayer(num_neurons, vocabular_size)
self.decoder_layers = [DecoderLayer(num_neurons, num_hidden_neurons, num_heads) for _ in range(num_dec_layers)]
def call(self, sequence, enconder_output, training, look_ahead_mask, padding_mask):
class Encoder(Layer):
def __init__(self, num_neurons, num_hidden_neurons, num_heads, vocabular_size, num_enc_layers = 6):
super(Encoder, self).__init__()
self.num_enc_layers = num_enc_layers
self.pre_processing_layer = PreProcessingLayer(num_neurons, vocabular_size)
self.encoder_layers = [EncoderLayer(num_neurons, num_hidden_neurons, num_heads) for _ in range(num_enc_layers)]
def call(self, sequence, training, mask):