Skip to content

Instantly share code, notes, and snippets.

View pythonlessons's full-sized avatar
🏠
Working from home

Rokas Liuberskis pythonlessons

🏠
Working from home
View GitHub Profile
@pythonlessons
pythonlessons / transformers_training_6.py
Created September 4, 2023 15:04
transformers_training
def read_files(path):
with open(path, "r", encoding="utf-8") as f:
en_train_dataset = f.read().split("\n")[:-1]
return en_train_dataset
en_training_data = read_files(en_training_data_path)
en_validation_data = read_files(en_validation_data_path)
es_training_data = read_files(es_training_data_path)
es_validation_data = read_files(es_validation_data_path)
@pythonlessons
pythonlessons / transformers_training_5.py
Created September 4, 2023 15:04
transformers_training
# Path to dataset
en_training_data_path = "Datasets/en-es/opus.en-es-train.en"
en_validation_data_path = "Datasets/en-es/opus.en-es-dev.en"
es_training_data_path = "Datasets/en-es/opus.en-es-train.es"
es_validation_data_path = "Datasets/en-es/opus.en-es-dev.es"
@pythonlessons
pythonlessons / transformers_training_4.py
Created September 4, 2023 15:04
transformers_training
#train.py
from model import Transformer
from configs import ModelConfigs
configs = ModelConfigs()
@pythonlessons
pythonlessons / transformers_training_3.py
Created September 4, 2023 15:04
transformers_training
#configs.py
import os
from datetime import datetime
from mltu.configs import BaseModelConfigs
class ModelConfigs(BaseModelConfigs):
def __init__(self):
super().__init__()
@pythonlessons
pythonlessons / transformers_training_2.py
Created September 4, 2023 15:04
transformers_training
#model.py
import tensorflow as tf
from mltu.tensorflow.transformer.layers import Encoder, Decoder
def Transformer(
input_vocab_size: int,
target_vocab_size: int,
encoder_input_size: int = None,
decoder_input_size: int = None,
@pythonlessons
pythonlessons / transformers_training_1.py
Created September 4, 2023 15:04
transformers_training
import numpy as np
import tensorflow as tf
try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
except: pass
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from mltu.tensorflow.callbacks import Model2onnx, WarmupCosineDecay
from mltu.tensorflow.dataProvider import DataProvider
@pythonlessons
pythonlessons / transformers_training_0.py
Created September 4, 2023 15:04
transformers_training
import os
import requests
from tqdm import tqdm
from bs4 import BeautifulSoup
# URL to the directory containing the files to be downloaded
language = "en-es"
url = f"https://data.statmt.org/opus-100-corpus/v1.0/supervised/{language}/"
save_directory = f"./Datasets/{language}"
@pythonlessons
pythonlessons / transformers_nlp_data_10.css
Created August 24, 2023 10:12
transformers_nlp_data
['fueron los asbestos aquí. ¡eso es lo que ocurrió!', 'me voy de aquí.', 'una vez, juro que cagué una barra de tiza.', 'y prefiero mudarme, ¿entiendes?']
["<start>it was the asbestos in here, that's what did it!", "<start>i'm out of here.", '<start>one time, i swear i pooped out a stick of chalk.', '<start>and i will move, do you understand me?']
["it was the asbestos in here, that's what did it!<eos>", "i'm out of here.<eos>", 'one time, i swear i pooped out a stick of chalk.<eos>', 'and i will move, do you understand me?<eos>']
@pythonlessons
pythonlessons / transformers_nlp_data_9.css
Created August 24, 2023 10:12
transformers_nlp_data
[33, 51, 48, 55, 55, 58, 3, 66, 58, 61, 55, 47, 15, 3, 51, 58, 66, 3, 44, 61, 48, 3, 68, 58, 64, 36, 32]
['<start>hello world, how are you?<eos>']
['hello world, how are you?']
@pythonlessons
pythonlessons / transformers_nlp_data_8.css
Created August 24, 2023 10:12
transformers_nlp_data
Fitting tokenizer: 100%|██████████| 995249/995249 [00:10<00:00, 95719.57it/s]
Fitting tokenizer: 100%|██████████| 995249/995249 [00:07<00:00, 134446.71it/s]