Skip to content

Instantly share code, notes, and snippets.

@ab3llini
Last active July 23, 2020 23:11
Show Gist options
  • Save ab3llini/9ef7566ee2668f9ff214ca0fea960a4e to your computer and use it in GitHub Desktop.
Save ab3llini/9ef7566ee2668f9ff214ca0fea960a4e to your computer and use it in GitHub Desktop.
Training
import torch
from utils.data.paths import resources_path
from torch.utils.data import random_split
from transformers.optimization import AdamW
from ignite.engine import Engine, Events
from ignite.metrics import RunningAverage, Accuracy, Precision
from ignite.handlers import ModelCheckpoint, EarlyStopping
from torch.utils.data import DataLoader
from ignite.contrib.handlers import ProgressBar
from transformers import BertForSequenceClassification, AutoTokenizer
import os
def train(model_name, model, epochs, train_dl, test_dl, optimizer, chkpt_dir):
# Ignite's Train loop
def train_loop(engine, batch):
# Set the model in training mode
model.train()
# Null the gradients
optimizer.zero_grad()
# Unpack the batch returned by the DataLoader
input_ids, attention_mask, token_type_ids, labels = batch
# Compute model outputs
output = model(
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
labels=labels
)
# The loss is the first element in the returned tuple
loss = output[0]
# Back-propagate
loss.backward()
# Compute optimizer step towards our loss minimum
optimizer.step()
# Return the loss value at each iteration
return loss.item()
# Ignite's Test loop
def val_loop(engine, batch):
# Set the model in evaluation mode, disabling dropout and autograd
model.eval()
# Deactivate autograd
with torch.no_grad():
# Unpack the batch returned by the test DataLoader
input_ids, attention_mask, token_type_ids, labels = batch
# Compute model outputs
output = model(
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
labels=labels
)
# Return predicted and real labels
return output[1], labels
# Define two Ignite Engines for training and testing, respectively.
trainer = Engine(train_loop)
validator = Engine(val_loop)
# Bind an accuracy & precision metric to the validation engine
Accuracy().attach(validator, "accuracy")
Precision().attach(validator, "precision")
# As Early Stopping score function we use the accuracy
def score_function(engine: Engine):
# The engine should be a validator
val_accuracy = engine.state.metrics['accuracy']
return -val_accuracy
# Define the actual Early Stopping function
early_stopping_handler = EarlyStopping(
patience=2, # You might want to change this, but be careful.
score_function=score_function,
trainer=trainer,
)
# Evaluate the model after every epoch.
@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
validator.run(test_dl)
print(
f"validation epoch: {engine.state.epoch} acc: {100 * validator.state.metrics['accuracy']} prec:{100 * validator.state.metrics['precision']}")
# Add a running average over the loss and a progressbar relative to the number of iterations
RunningAverage(output_transform=lambda x: x).attach(trainer, "loss")
ProgressBar(persist=True).attach(trainer, metric_names=['loss'])
# Save checkpoints and config
checkpoint_handler = ModelCheckpoint(
chkpt_dir,
'checkpoint',
n_saved=epochs,
save_as_state_dict=True,
require_empty=False
)
# Save a model with huggingface's interface
def huggingface_save_handler():
# Create the dir
os.mkdir(os.path.join(chkpt_dir, 'huggingface'))
# Save model & tokenizer
model.save_pretrained(save_directory=os.path.join(chkpt_dir, 'huggingface'))
AutoTokenizer.from_pretrained(model_name).save_pretrained(os.path.join(chkpt_dir, 'huggingface'))
trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler, {'model': model})
trainer.add_event_handler(Events.COMPLETED, huggingface_save_handler)
validator.add_event_handler(Events.COMPLETED, early_stopping_handler)
# Do not run for more than 5 epochs, you'll
trainer.run(train_dl, max_epochs=epochs)
if __name__ == '__main__':
# Italian Pre-Trained BERT model
model_name = 'dbmdz/bert-base-italian-xxl-cased'
model_chkpt_dir = 'path/to/your/checkpoint/dir'
# If you don't have an NVIDIA GPU use 'cpu'
device = torch.device('cuda')
# Download the Pre-Trained model
model = BertForSequenceClassification.from_pretrained(
pretrained_model_name_or_path=model_name,
num_labels=3, # 3 classes: negative, neutral, positive
output_attentions=False,
output_hidden_states=False
).to(device)
# I suggest to work will small batches.
batch_size = 32
# The author of BERT specifically tell us to use a learning rate between 2e-5 and 5e-5
lr = 2e-5
# The author of BERT specifically tell us to fine tune our model up to 5 epochs at maximum
epochs = 5
# Load our pre-processed dataset
dataset = TweetDataset('path/to/your/preprocessed/file.json', device)
# Get the size of the training & testing sets
size = len(dataset)
tr_size = int(size * 0.8)
ts_size = int(size - tr_size)
# Divide in train and test split
train_ds, test_ds = random_split(dataset, [tr_size, ts_size])
# Create two separate data loaders, we use batches of 32 elements.
# Shuffle is activated and 2 threads take care of fetching and pre caching our batches.
# Memory is pinned to boost Host-Device tensor transfer.
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
# Pick the optimizer according to the official paper.
# Note: AdamW is a class from the Huggingface library (as opposed to pytorch)
# 'W' stands for 'Weight Decay"
optimizer = AdamW(
model.parameters(),
lr=lr, # Between 2e-5 and 5e-5 according to the authors of BERT
eps=1e-8 # Default value. Leave it like this unless you know why you need to change it.
)
# Call the training procedure
train(model_name, model, epochs, train_dl, test_dl, optimizer, model_chkpt_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment