ab3llini · July 23, 2020 23:11
diff --git a/Training.py b/Training.py
 import torch
 from utils.data.paths import resources_path
 from torch.utils.data import random_split
 from transformers.optimization import AdamW
 from ignite.engine import Engine, Events
 from ignite.metrics import RunningAverage, Accuracy, Precision
 from ignite.handlers import ModelCheckpoint, EarlyStopping
 from torch.utils.data import DataLoader
 from ignite.contrib.handlers import ProgressBar
 from transformers import BertForSequenceClassification, AutoTokenizer
 import os


 def train(model_name, model, epochs, train_dl, test_dl, optimizer, chkpt_dir):
    # Ignite's Train loop
    def train_loop(engine, batch):
        # Set the model in training mode
        model.train()

        # Null the gradients
        optimizer.zero_grad()

        # Unpack the batch returned by the DataLoader
        input_ids, attention_mask, token_type_ids, labels = batch

        # Compute model outputs
        output = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            labels=labels
        )

        # The loss is the first element in the returned tuple
        loss = output[0]

        # Back-propagate
        loss.backward()

        # Compute optimizer step towards our loss minimum
        optimizer.step()

        # Return the loss value at each iteration
        return loss.item()

    # Ignite's Test loop
    def val_loop(engine, batch):
        # Set the model in evaluation mode, disabling dropout and autograd
        model.eval()

        # Deactivate autograd
        with torch.no_grad():
            # Unpack the batch returned by the test DataLoader
            input_ids, attention_mask, token_type_ids, labels = batch

            # Compute model outputs
            output = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                token_type_ids=token_type_ids,
                labels=labels
            )

        # Return predicted and real labels
        return output[1], labels

    # Define two Ignite Engines for training and testing, respectively.
    trainer = Engine(train_loop)
    validator = Engine(val_loop)

    # Bind an accuracy & precision metric to the validation engine
    Accuracy().attach(validator, "accuracy")
    Precision().attach(validator, "precision")

    # As Early Stopping score function we use the accuracy
    def score_function(engine: Engine):
        # The engine should be a validator
        val_accuracy = engine.state.metrics['accuracy']
        return -val_accuracy

    # Define the actual Early Stopping function
    early_stopping_handler = EarlyStopping(
        patience=2,  # You might want to change this, but be careful.
        score_function=score_function,
        trainer=trainer,
    )

    # Evaluate the model after every epoch.
    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        validator.run(test_dl)
        print(
            f"validation epoch: {engine.state.epoch} acc: {100 * validator.state.metrics['accuracy']} prec:{100 * validator.state.metrics['precision']}")

    # Add a running average over the loss and a progressbar relative to the number of iterations
    RunningAverage(output_transform=lambda x: x).attach(trainer, "loss")
    ProgressBar(persist=True).attach(trainer, metric_names=['loss'])

    # Save checkpoints and config
    checkpoint_handler = ModelCheckpoint(
        chkpt_dir,
        'checkpoint',
        n_saved=epochs,
        save_as_state_dict=True,
        require_empty=False
    )

    # Save a model with huggingface's interface
    def huggingface_save_handler():
        # Create the dir
        os.mkdir(os.path.join(chkpt_dir, 'huggingface'))

        # Save model & tokenizer
        model.save_pretrained(save_directory=os.path.join(chkpt_dir, 'huggingface'))
        AutoTokenizer.from_pretrained(model_name).save_pretrained(os.path.join(chkpt_dir, 'huggingface'))

    trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler, {'model': model})
    trainer.add_event_handler(Events.COMPLETED, huggingface_save_handler)
    validator.add_event_handler(Events.COMPLETED, early_stopping_handler)

    # Do not run for more than 5 epochs, you'll
    trainer.run(train_dl, max_epochs=epochs)


 if __name__ == '__main__':
    # Italian Pre-Trained BERT model
    model_name = 'dbmdz/bert-base-italian-xxl-cased'

    model_chkpt_dir = 'path/to/your/checkpoint/dir'

    # If you don't have an NVIDIA GPU use 'cpu'
    device = torch.device('cuda')

    # Download the Pre-Trained model
    model = BertForSequenceClassification.from_pretrained(
        pretrained_model_name_or_path=model_name,
        num_labels=3,  # 3 classes: negative, neutral, positive
        output_attentions=False,
        output_hidden_states=False
    ).to(device)

    # I suggest to work will small batches.
    batch_size = 32

    # The author of BERT specifically tell us to use a learning rate between 2e-5 and 5e-5
    lr = 2e-5

    # The author of BERT specifically tell us to fine tune our model up to 5 epochs at maximum
    epochs = 5

    # Load our pre-processed dataset
    dataset = TweetDataset('path/to/your/preprocessed/file.json', device)

    # Get the size of the training & testing sets
    size = len(dataset)
    tr_size = int(size * 0.8)
    ts_size = int(size - tr_size)

    # Divide in train and test split
    train_ds, test_ds = random_split(dataset, [tr_size, ts_size])

    # Create two separate data loaders, we use batches of 32 elements.
    # Shuffle is activated and 2 threads take care of fetching and pre caching our batches.
    # Memory is pinned to boost Host-Device tensor transfer.
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)

    # Pick the optimizer according to the official paper.
    # Note: AdamW is a class from the Huggingface library (as opposed to pytorch)
    # 'W' stands for 'Weight Decay"
    optimizer = AdamW(
        model.parameters(),
        lr=lr,  # Between 2e-5 and 5e-5 according to the authors of BERT
        eps=1e-8  # Default value. Leave it like this unless you know why you need to change it.
    )

    # Call the training procedure
    train(model_name, model, epochs, train_dl, test_dl, optimizer, model_chkpt_dir)
	import torch
	from utils.data.paths import resources_path
	from torch.utils.data import random_split
	from transformers.optimization import AdamW
	from ignite.engine import Engine, Events
	from ignite.metrics import RunningAverage, Accuracy, Precision
	from ignite.handlers import ModelCheckpoint, EarlyStopping
	from torch.utils.data import DataLoader
	from ignite.contrib.handlers import ProgressBar
	from transformers import BertForSequenceClassification, AutoTokenizer
	import os


	def train(model_name, model, epochs, train_dl, test_dl, optimizer, chkpt_dir):
	# Ignite's Train loop
	def train_loop(engine, batch):
	# Set the model in training mode
	model.train()

	# Null the gradients
	optimizer.zero_grad()

	# Unpack the batch returned by the DataLoader
	input_ids, attention_mask, token_type_ids, labels = batch

	# Compute model outputs
	output = model(
	input_ids=input_ids,
	attention_mask=attention_mask,
	token_type_ids=token_type_ids,
	labels=labels
	)

	# The loss is the first element in the returned tuple
	loss = output[0]

	# Back-propagate
	loss.backward()

	# Compute optimizer step towards our loss minimum
	optimizer.step()

	# Return the loss value at each iteration
	return loss.item()

	# Ignite's Test loop
	def val_loop(engine, batch):
	# Set the model in evaluation mode, disabling dropout and autograd
	model.eval()

	# Deactivate autograd
	with torch.no_grad():
	# Unpack the batch returned by the test DataLoader
	input_ids, attention_mask, token_type_ids, labels = batch

	# Compute model outputs
	output = model(
	input_ids=input_ids,
	attention_mask=attention_mask,
	token_type_ids=token_type_ids,
	labels=labels
	)

	# Return predicted and real labels
	return output[1], labels

	# Define two Ignite Engines for training and testing, respectively.
	trainer = Engine(train_loop)
	validator = Engine(val_loop)

	# Bind an accuracy & precision metric to the validation engine
	Accuracy().attach(validator, "accuracy")
	Precision().attach(validator, "precision")

	# As Early Stopping score function we use the accuracy
	def score_function(engine: Engine):
	# The engine should be a validator
	val_accuracy = engine.state.metrics['accuracy']
	return -val_accuracy

	# Define the actual Early Stopping function
	early_stopping_handler = EarlyStopping(
	patience=2, # You might want to change this, but be careful.
	score_function=score_function,
	trainer=trainer,
	)

	# Evaluate the model after every epoch.
	@trainer.on(Events.EPOCH_COMPLETED)
	def log_validation_results(engine):
	validator.run(test_dl)
	print(
	f"validation epoch: {engine.state.epoch} acc: {100 * validator.state.metrics['accuracy']} prec:{100 * validator.state.metrics['precision']}")

	# Add a running average over the loss and a progressbar relative to the number of iterations
	RunningAverage(output_transform=lambda x: x).attach(trainer, "loss")
	ProgressBar(persist=True).attach(trainer, metric_names=['loss'])

	# Save checkpoints and config
	checkpoint_handler = ModelCheckpoint(
	chkpt_dir,
	'checkpoint',
	n_saved=epochs,
	save_as_state_dict=True,
	require_empty=False
	)

	# Save a model with huggingface's interface
	def huggingface_save_handler():
	# Create the dir
	os.mkdir(os.path.join(chkpt_dir, 'huggingface'))

	# Save model & tokenizer
	model.save_pretrained(save_directory=os.path.join(chkpt_dir, 'huggingface'))
	AutoTokenizer.from_pretrained(model_name).save_pretrained(os.path.join(chkpt_dir, 'huggingface'))

	trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler, {'model': model})
	trainer.add_event_handler(Events.COMPLETED, huggingface_save_handler)
	validator.add_event_handler(Events.COMPLETED, early_stopping_handler)

	# Do not run for more than 5 epochs, you'll
	trainer.run(train_dl, max_epochs=epochs)


	if __name__ == '__main__':
	# Italian Pre-Trained BERT model
	model_name = 'dbmdz/bert-base-italian-xxl-cased'

	model_chkpt_dir = 'path/to/your/checkpoint/dir'

	# If you don't have an NVIDIA GPU use 'cpu'
	device = torch.device('cuda')

	# Download the Pre-Trained model
	model = BertForSequenceClassification.from_pretrained(
	pretrained_model_name_or_path=model_name,
	num_labels=3, # 3 classes: negative, neutral, positive
	output_attentions=False,
	output_hidden_states=False
	).to(device)

	# I suggest to work will small batches.
	batch_size = 32

	# The author of BERT specifically tell us to use a learning rate between 2e-5 and 5e-5
	lr = 2e-5

	# The author of BERT specifically tell us to fine tune our model up to 5 epochs at maximum
	epochs = 5

	# Load our pre-processed dataset
	dataset = TweetDataset('path/to/your/preprocessed/file.json', device)

	# Get the size of the training & testing sets
	size = len(dataset)
	tr_size = int(size * 0.8)
	ts_size = int(size - tr_size)

	# Divide in train and test split
	train_ds, test_ds = random_split(dataset, [tr_size, ts_size])

	# Create two separate data loaders, we use batches of 32 elements.
	# Shuffle is activated and 2 threads take care of fetching and pre caching our batches.
	# Memory is pinned to boost Host-Device tensor transfer.
	train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
	test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)

	# Pick the optimizer according to the official paper.
	# Note: AdamW is a class from the Huggingface library (as opposed to pytorch)
	# 'W' stands for 'Weight Decay"
	optimizer = AdamW(
	model.parameters(),
	lr=lr, # Between 2e-5 and 5e-5 according to the authors of BERT
	eps=1e-8 # Default value. Leave it like this unless you know why you need to change it.
	)

	# Call the training procedure
	train(model_name, model, epochs, train_dl, test_dl, optimizer, model_chkpt_dir)