Last active
July 23, 2020 23:11
-
-
Save ab3llini/9ef7566ee2668f9ff214ca0fea960a4e to your computer and use it in GitHub Desktop.
Training
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from utils.data.paths import resources_path | |
from torch.utils.data import random_split | |
from transformers.optimization import AdamW | |
from ignite.engine import Engine, Events | |
from ignite.metrics import RunningAverage, Accuracy, Precision | |
from ignite.handlers import ModelCheckpoint, EarlyStopping | |
from torch.utils.data import DataLoader | |
from ignite.contrib.handlers import ProgressBar | |
from transformers import BertForSequenceClassification, AutoTokenizer | |
import os | |
def train(model_name, model, epochs, train_dl, test_dl, optimizer, chkpt_dir): | |
# Ignite's Train loop | |
def train_loop(engine, batch): | |
# Set the model in training mode | |
model.train() | |
# Null the gradients | |
optimizer.zero_grad() | |
# Unpack the batch returned by the DataLoader | |
input_ids, attention_mask, token_type_ids, labels = batch | |
# Compute model outputs | |
output = model( | |
input_ids=input_ids, | |
attention_mask=attention_mask, | |
token_type_ids=token_type_ids, | |
labels=labels | |
) | |
# The loss is the first element in the returned tuple | |
loss = output[0] | |
# Back-propagate | |
loss.backward() | |
# Compute optimizer step towards our loss minimum | |
optimizer.step() | |
# Return the loss value at each iteration | |
return loss.item() | |
# Ignite's Test loop | |
def val_loop(engine, batch): | |
# Set the model in evaluation mode, disabling dropout and autograd | |
model.eval() | |
# Deactivate autograd | |
with torch.no_grad(): | |
# Unpack the batch returned by the test DataLoader | |
input_ids, attention_mask, token_type_ids, labels = batch | |
# Compute model outputs | |
output = model( | |
input_ids=input_ids, | |
attention_mask=attention_mask, | |
token_type_ids=token_type_ids, | |
labels=labels | |
) | |
# Return predicted and real labels | |
return output[1], labels | |
# Define two Ignite Engines for training and testing, respectively. | |
trainer = Engine(train_loop) | |
validator = Engine(val_loop) | |
# Bind an accuracy & precision metric to the validation engine | |
Accuracy().attach(validator, "accuracy") | |
Precision().attach(validator, "precision") | |
# As Early Stopping score function we use the accuracy | |
def score_function(engine: Engine): | |
# The engine should be a validator | |
val_accuracy = engine.state.metrics['accuracy'] | |
return -val_accuracy | |
# Define the actual Early Stopping function | |
early_stopping_handler = EarlyStopping( | |
patience=2, # You might want to change this, but be careful. | |
score_function=score_function, | |
trainer=trainer, | |
) | |
# Evaluate the model after every epoch. | |
@trainer.on(Events.EPOCH_COMPLETED) | |
def log_validation_results(engine): | |
validator.run(test_dl) | |
print( | |
f"validation epoch: {engine.state.epoch} acc: {100 * validator.state.metrics['accuracy']} prec:{100 * validator.state.metrics['precision']}") | |
# Add a running average over the loss and a progressbar relative to the number of iterations | |
RunningAverage(output_transform=lambda x: x).attach(trainer, "loss") | |
ProgressBar(persist=True).attach(trainer, metric_names=['loss']) | |
# Save checkpoints and config | |
checkpoint_handler = ModelCheckpoint( | |
chkpt_dir, | |
'checkpoint', | |
n_saved=epochs, | |
save_as_state_dict=True, | |
require_empty=False | |
) | |
# Save a model with huggingface's interface | |
def huggingface_save_handler(): | |
# Create the dir | |
os.mkdir(os.path.join(chkpt_dir, 'huggingface')) | |
# Save model & tokenizer | |
model.save_pretrained(save_directory=os.path.join(chkpt_dir, 'huggingface')) | |
AutoTokenizer.from_pretrained(model_name).save_pretrained(os.path.join(chkpt_dir, 'huggingface')) | |
trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler, {'model': model}) | |
trainer.add_event_handler(Events.COMPLETED, huggingface_save_handler) | |
validator.add_event_handler(Events.COMPLETED, early_stopping_handler) | |
# Do not run for more than 5 epochs, you'll | |
trainer.run(train_dl, max_epochs=epochs) | |
if __name__ == '__main__': | |
# Italian Pre-Trained BERT model | |
model_name = 'dbmdz/bert-base-italian-xxl-cased' | |
model_chkpt_dir = 'path/to/your/checkpoint/dir' | |
# If you don't have an NVIDIA GPU use 'cpu' | |
device = torch.device('cuda') | |
# Download the Pre-Trained model | |
model = BertForSequenceClassification.from_pretrained( | |
pretrained_model_name_or_path=model_name, | |
num_labels=3, # 3 classes: negative, neutral, positive | |
output_attentions=False, | |
output_hidden_states=False | |
).to(device) | |
# I suggest to work will small batches. | |
batch_size = 32 | |
# The author of BERT specifically tell us to use a learning rate between 2e-5 and 5e-5 | |
lr = 2e-5 | |
# The author of BERT specifically tell us to fine tune our model up to 5 epochs at maximum | |
epochs = 5 | |
# Load our pre-processed dataset | |
dataset = TweetDataset('path/to/your/preprocessed/file.json', device) | |
# Get the size of the training & testing sets | |
size = len(dataset) | |
tr_size = int(size * 0.8) | |
ts_size = int(size - tr_size) | |
# Divide in train and test split | |
train_ds, test_ds = random_split(dataset, [tr_size, ts_size]) | |
# Create two separate data loaders, we use batches of 32 elements. | |
# Shuffle is activated and 2 threads take care of fetching and pre caching our batches. | |
# Memory is pinned to boost Host-Device tensor transfer. | |
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True) | |
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True) | |
# Pick the optimizer according to the official paper. | |
# Note: AdamW is a class from the Huggingface library (as opposed to pytorch) | |
# 'W' stands for 'Weight Decay" | |
optimizer = AdamW( | |
model.parameters(), | |
lr=lr, # Between 2e-5 and 5e-5 according to the authors of BERT | |
eps=1e-8 # Default value. Leave it like this unless you know why you need to change it. | |
) | |
# Call the training procedure | |
train(model_name, model, epochs, train_dl, test_dl, optimizer, model_chkpt_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment