Created
February 20, 2023 08:56
-
-
Save shresthakamal/968c64359de16d1f6e1f3fcb51ee0991 to your computer and use it in GitHub Desktop.
Standard Finetunning Steps
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# standard steps to follow for finetunning BERT | |
# 1. Load the pre-trained model | |
# 2. Tokenize the input | |
# 3. Convert the tokens to their index numbers in the BERT vocabulary | |
# 4. Set all of the model’s parameters to their gradients to zero | |
# 5. Run the forward pass, calculate the loss, and perform a backward pass to calculate the gradients | |
# 6. Clip the the gradients to 1.0. It helps in preventing the exploding gradient problem | |
# 7. Update the model’s parameters | |
# 8. Update the learning rate. | |
# 9. Clear the calculated gradients | |
# 1. Load the pre-trained model | |
# Load pre-trained model tokenizer (vocabulary) | |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
# Load pre-trained model (weights) | |
model = BertForSequenceClassification.from_pretrained( | |
"bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab. | |
num_labels = 2, # The number of output labels--2 for binary classification. | |
# You can increase this for multi-class tasks. | |
output_attentions = False, # Whether the model returns attentions weights. | |
output_hidden_states = False, # Whether the model returns all hidden-states. | |
) | |
# 2. Tokenize the input | |
# Print the original sentence. | |
print(' Original: ', sentences[0]) | |
# Print the sentence split into tokens. | |
print('Tokenized: ', tokenizer.tokenize(sentences[0])) | |
# Print the sentence mapped to token ids. | |
print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sentences[0]))) | |
# 3. Convert the tokens to their index numbers in the BERT vocabulary | |
# Mark each of the 22 tokens as belonging to sentence "1". | |
segments_ids = [1] * len(tokenizer.tokenize(sentences[0])) | |
# Print the sentence mapped to token ids. | |
print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sentences[0]))) | |
# 4. Set all of the model’s parameters to their gradients to zero | |
# Load pre-trained model (weights) | |
model = BertForSequenceClassification.from_pretrained( | |
"bert-base-uncased", | |
num_labels = 2, | |
output_attentions = False, | |
output_hidden_states = False, | |
) | |
# Tell pytorch to run this model on the GPU. | |
model.cuda() | |
# 5. Run the forward pass, calculate the loss, and perform a backward pass to calculate the gradients | |
# Put the model in "evaluation" mode, meaning feed-forward operation. | |
model.eval() | |
# Copy the model to the GPU. | |
model.to(device) | |
# Tokenize all of the sentences and map the tokens to thier word IDs. | |
input_ids = [] | |
# For every sentence... | |
for sent in sentences: | |
# `encode` will: | |
# (1) Tokenize the sentence. | |
# (2) Prepend the `[CLS]` token to the start. | |
# (3) Append the `[SEP]` token to the end. | |
# (4) Map tokens to their IDs. | |
encoded_sent = tokenizer.encode( | |
sent, # Sentence to encode. | |
add_special_tokens = True, # Add '[CLS]' and '[SEP]' | |
) | |
# Add the encoded sentence to the list. | |
input_ids.append(encoded_sent) | |
# Print sentence 0, now as a list of IDs. | |
print('Original: ', sentences[0]) | |
print('Token IDs:', input_ids[0]) | |
# Pad our input tokens | |
input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, | |
dtype="long", truncating="post", padding="post") | |
# Create attention masks | |
attention_masks = [] | |
# Create a mask of 1s for each token followed by 0s for padding | |
for seq in input_ids: | |
seq_mask = [float(i>0) for i in seq] | |
attention_masks.append(seq_mask) | |
# Use train_test_split to split our data into train and validation sets for training | |
# Use 90% for training and 10% for validation. | |
train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels, | |
random_state=2018, test_size=0.1) | |
# Do the same for the masks. | |
train_masks, validation_masks, _, _ = train_test_split(attention_masks, input_ids, | |
random_state=2018, test_size=0.1) | |
# Convert all of our data into torch tensors, the required datatype for our model | |
train_inputs = torch.tensor(train_inputs) | |
validation_inputs = torch.tensor(validation_inputs) | |
train_labels = torch.tensor(train_labels) | |
validation_labels = torch.tensor(validation_labels) | |
train_masks = torch.tensor(train_masks) | |
validation_masks = torch.tensor(validation_masks) | |
# Select a batch size for training. For fine-tuning BERT on a specific task, the authors recommend a batch size of 16 or 32. | |
batch_size = 32 | |
# Create an iterator of our data with torch DataLoader. This helps save on memory during training because, unlike a for loop, | |
# with an iterator the entire dataset does not need to be loaded into memory | |
# The DataLoader needs to know our batch size for training, so we specify it here. | |
# For fine-tuning BERT on a specific task, the authors recommend between 2 and 4 epochs. | |
# We chose to run for 4, but we'll see later that this may be over-fitting the training data. | |
# Create the DataLoader for our training set. | |
train_data = TensorDataset(train_inputs, train_masks, train_labels) | |
train_sampler = RandomSampler(train_data) | |
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size) | |
# Create the DataLoader for our validation set. | |
validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels) | |
validation_sampler = SequentialSampler(validation_data) | |
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size) | |
# 6. Clip the the gradients to 1.0. It helps in preventing the exploding gradient problem | |
# This training code is based on the `run_glue.py` script here: | |
# | |
# | |
# We'll store a number of quantities such as training and validation loss, validation accuracy, and timings. | |
training_stats = [] | |
# Measure the total training time for the whole run. | |
total_t0 = time.time() | |
# For each epoch... | |
for epoch_i in range(0, epochs): | |
# ======================================== | |
# Training | |
# ======================================== | |
# Perform one full pass over the training set. | |
print("") | |
print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs)) | |
print('Training...') | |
# Measure how long the training epoch takes. | |
t0 = time.time() | |
# Reset the total loss for this epoch. | |
total_train_loss = 0 | |
# Put the model into training mode. Don't be mislead--the call to `train` just changes the *mode*, it doesn't *perform* the training. | |
# `dropout` and `batchnorm` layers behave differently during training vs. test (source). | |
model.train() | |
model.to(device) | |
# | |
# As we unpack the batch, we'll also copy each tensor to the GPU using the `to` method. | |
# | |
# `batch` contains three pytorch tensors: | |
# [0]: input ids | |
# [1]: attention masks | |
# [2]: labels | |
b_input_ids = batch[0].to(device) | |
b_input_mask = batch[1].to(device) | |
b_labels = batch[2].to(device) | |
# Always clear any previously calculated gradients before performing a backward pass. | |
model.zero_grad() | |
# Perform a forward pass (evaluate the model on this training batch). | |
# This will return the loss (rather than the model output) because we have provided the `labels`. | |
# The documentation for this `model` function is here: | |
# https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification | |
loss, logits = model(b_input_ids, | |
token_type_ids=None, | |
attention_mask=b_input_mask, | |
labels=b_labels) | |
# Accumulate the training loss over all of the batches so that we can calculate the average loss at the end. | |
total_train_loss += loss.item() | |
# Perform a backward pass to calculate the gradients. | |
loss.backward() | |
# Clip the norm of the gradients to 1.0 to prevent "exploding gradients". | |
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) | |
# Update parameters and take a step using the computed gradient. | |
# The optimizer dictates the "update rule"--how the parameters are modified based on their gradients, the learning rate, etc. | |
optimizer.step() | |
# Update the learning rate. | |
scheduler.step() | |
# Calculate the average loss over all of the batches. | |
avg_train_loss = total_train_loss / len(train_dataloader) | |
# Measure how long this epoch took. | |
training_time = format_time(time.time() - t0) | |
print("") | |
print(" Average training loss: {0:.2f}".format(avg_train_loss)) | |
print(" Training epcoh took: {:}".format(training_time)) | |
# ======================================== | |
# Validation | |
# ======================================== | |
# After the completion of each training epoch, measure our performance on our validation set. | |
print("") | |
print("Running Validation...") | |
t0 = time.time() | |
# Put the model in evaluation mode--the dropout layers behave differently during evaluation. | |
model.eval() | |
# Tracking variables | |
total_eval_accuracy = 0 | |
total_eval_loss = 0 | |
nb_eval_steps = 0 | |
# Evaluate data for one epoch | |
for batch in validation_dataloader: | |
# Unpack this training batch from our dataloader. | |
# | |
# As we unpack the batch, we'll also copy each tensor to the GPU using the `to` method. | |
# | |
# `batch` contains three pytorch tensors: | |
# [0]: input ids | |
# [1]: attention masks | |
# [2]: labels | |
b_input_ids = batch[0].to(device) | |
b_input_mask = batch[1].to(device) | |
b_labels = batch[2].to(device) | |
# Tell pytorch not to bother with constructing the compute graph during the forward pass, since this is only needed for backprop (training). | |
with torch.no_grad(): | |
# Forward pass, calculate logit predictions. | |
# token_type_ids is the same as the "segment ids", which | |
# differentiates sentence 1 and 2 in 2-sentence tasks. | |
# The documentation for this `model` function is here: | |
# https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification | |
(loss, logits) = model(b_input_ids, | |
token_type_ids=None, | |
attention_mask=b_input_mask, | |
labels=b_labels) | |
# Accumulate the validation loss. | |
total_eval_loss += loss.item() | |
# Move logits and labels to CPU | |
logits = logits.detach().cpu().numpy() | |
label_ids = b_labels.to('cpu').numpy() | |
# Calculate the accuracy for this batch of test sentences. | |
total_eval_accuracy += flat_accuracy(logits, label_ids) | |
# Report the final accuracy for this validation run. | |
avg_val_accuracy = total_eval_accuracy / len(validation_dataloader) | |
print(" Accuracy: {0:.2f}".format(avg_val_accuracy)) | |
# Calculate the average loss over all of the batches. | |
avg_val_loss = total_eval_loss / len(validation_dataloader) | |
# Measure how long the validation run took. | |
validation_time = format_time(time.time() - t0) | |
print(" Validation Loss: {0:.2f}".format(avg_val_loss)) | |
print(" Validation took: {:}".format(validation_time)) | |
# Record all statistics from this epoch. | |
training_stats.append( | |
{ | |
'epoch': epoch_i + 1, | |
'Training Loss': avg_train_loss, | |
'Valid. Loss': avg_val_loss, | |
'Valid. Accur.': avg_val_accuracy, | |
'Training Time': training_time, | |
'Validation Time': validation_time | |
} | |
) | |
print("") | |
print("Training complete!") | |
print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0))) | |
# Display floats with two decimal places. | |
pd.set_option('precision', 2) | |
# Create a DataFrame from our training statistics. | |
df_stats = pd.DataFrame(data=training_stats) | |
# Use the 'epoch' as the row index. | |
df_stats = df_stats.set_index('epoch') | |
# A hack to force the column headers to wrap. | |
#df = df.style.set_table_styles([dict(selector="th",props=[('max-width', '70px')])]) | |
# Display the table. | |
print(df_stats) | |
# Save the trained model and the associated configuration | |
# If we have a distributed model, save only the encapsulated model | |
# (it was wrapped in PyTorch DistributedDataParallel or DataParallel) | |
model_to_save = model.module if hasattr(model, 'module') else model | |
# If we save using the predefined names, we can load using `from_pretrained` | |
output_dir = './model_save/' | |
# Create output directory if needed | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
print("Saving model to %s" % output_dir) | |
# Save a trained model, configuration and tokenizer using `save_pretrained()`. | |
# They can then be reloaded using `from_pretrained()` | |
model_to_save.save_pretrained(output_dir) | |
tokenizer.save_pretrained(output_dir) | |
# Good practice: save your training arguments together with the trained model | |
torch.save(args, os.path.join(output_dir, 'training_args.bin')) | |
# Load a trained model and vocabulary that you have fine-tuned | |
model = model_class.from_pretrained(output_dir) | |
tokenizer = tokenizer_class.from_pretrained(output_dir) | |
# Copy the model to the GPU. | |
model.to(device) | |
# ======================================== | |
# Testing | |
# ======================================== | |
# After the completion of each training epoch, measure our performance on our test set. | |
print("") | |
print("Running Testing...") | |
t0 = time.time() | |
# Put the model in evaluation mode--the dropout layers behave differently during evaluation. | |
model.eval() | |
# Tracking variables | |
predictions , true_labels = [], [] | |
# Predict | |
for batch in test_dataloader: | |
# Unpack this training batch from our dataloader. | |
# | |
# As we unpack the batch, we'll also copy each tensor to the GPU using the `to` method. | |
# | |
# `batch` contains three pytorch tensors: | |
# [0]: input ids | |
# [1]: attention masks | |
# [2]: labels | |
b_input_ids = batch[0].to(device) | |
b_input_mask = batch[1].to(device) | |
b_labels = batch[2].to(device) | |
# Tell pytorch not to bother with constructing the compute graph during the forward pass, since this is only needed for backprop (training). | |
with torch.no_grad(): | |
# Forward pass, calculate logit predictions. | |
# token_type_ids is the same as the "segment ids", which | |
# differentiates sentence 1 and 2 in 2-sentence tasks. | |
# The documentation for this `model` function is here: | |
# https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification | |
(loss, logits) = model(b_input_ids, | |
token_type_ids=None, | |
attention_mask=b_input_mask, | |
labels=b_labels) | |
# Move logits and labels to CPU | |
logits = logits.detach().cpu().numpy() | |
label_ids = b_labels.to('cpu').numpy() | |
# Store predictions and true labels | |
predictions.append(logits) | |
true_labels.append(label_ids) | |
print(' DONE.') | |
print("Testing took {:}".format(format_time(time.time() - t0))) | |
# Combine the results across all batches. | |
flat_predictions = np.concatenate(predictions, axis=0) | |
# For each sample, pick the label (0 or 1) with the higher score. | |
flat_predictions = np.argmax(flat_predictions, axis=1).flatten() | |
# Combine the correct labels for each batch into a single list. | |
flat_true_labels = np.concatenate(true_labels, axis=0) | |
# Calculate the MCC | |
mcc = matthews_corrcoef(flat_true_labels, flat_predictions) | |
print('MCC: {:.3f}'.format(mcc)) | |
# Calculate the accuracy. | |
accuracy = accuracy_score(flat_true_labels, flat_predictions) | |
print('Accuracy: {:.3f}'.format(accuracy)) | |
# Report the number of sentences. | |
print('Number of test sentences: {:,}\n'.format(len(flat_true_labels))) | |
# Create a classification report. | |
report = classification_report(flat_true_labels, flat_predictions, labels=[0,1], target_names=['negative', 'positive'], output_dict=True) | |
print(report) | |
# Save the report | |
with open('report.json', 'w') as f: | |
json.dump(report, f) | |
# Save the predictions | |
with open('predictions.json', 'w') as f: | |
json.dump(flat_predictions.tolist(), f) | |
# Save the true labels | |
with open('true_labels.json', 'w') as f: | |
json.dump(flat_true_labels.tolist(), f) | |
# Save the MCC | |
with open('mcc.json', 'w') as f: | |
json.dump(mcc, f) | |
# Save the accuracy | |
with open('accuracy.json', 'w') as f: | |
json.dump(accuracy, f) | |
# Save the training stats | |
with open('training_stats.json', 'w') as f: | |
json.dump(training_stats, f) | |
# Save the training arguments | |
with open('training_args.json', 'w') as f: | |
json.dump(args, f) | |
# Save the model | |
model.save_pretrained('model_save') | |
# Save the tokenizer | |
tokenizer.save_pretrained('model_save') | |
# Save the training arguments | |
torch.save(args, os.path.join('model_save', 'training_args.bin')) | |
# ======================================== | |
# Plotting | |
# ======================================== | |
# Plot the training loss and accuracy curves for training and validation | |
# Set the style | |
plt.style.use(['seaborn-whitegrid']) | |
# Increase the plot size and font size. | |
plt.rcParams['figure.figsize'] = [12, 4] | |
# Plot the learning curve. | |
plt.plot(df_stats['Training Loss'], 'b-o', label="Training") | |
plt.title("Training loss") | |
plt.xlabel("Epoch") | |
plt.ylabel("Loss")# Copy the model to the GPU. | |
model.to(device) | |
# ======================================== | |
# Testing | |
# ======================================== | |
# After the completion of each training epoch, measure our performance on our test set. | |
print("") | |
print("Running Testing...") | |
t0 = time.time() | |
# Put the model in evaluation mode--the dropout layers behave differently during evaluation. | |
model.eval() | |
# Tracking variables | |
predictions , true_labels = [], [] | |
# Predict | |
for batch in test_dataloader: | |
# Unpack this training batch from our dataloader. | |
# | |
# As we unpack the batch, we'll also copy each tensor to the GPU using the `to` method. | |
# | |
# `batch` contains three pytorch tensors: | |
# [0]: input ids | |
# [1]: attention masks | |
# [2]: labels | |
b_input_ids = batch[0].to(device) | |
b_input_mask = batch[1].to(device) | |
b_labels = batch[2].to(device) | |
# Tell pytorch not to bother with constructing the compute graph during the forward pass, since this is only needed for backprop (training). | |
with torch.no_grad(): | |
# Forward pass, calculate logit predictions. | |
# token_type_ids is the same as the "segment ids", which | |
# differentiates sentence 1 and 2 in 2-sentence tasks. | |
# The documentation for this `model` function is here: | |
# https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification | |
(loss, logits) = model(b_input_ids, | |
token_type_ids=None, | |
attention_mask=b_input_mask, | |
labels=b_labels) | |
# Move logits and labels to CPU | |
logits = logits.detach().cpu().numpy() | |
label_ids = b_labels.to('cpu').numpy() | |
# Store predictions and true labels | |
predictions.append(logits) | |
true_labels.append(label_ids) | |
print(' DONE.') | |
print("Testing took {:}".format(format_time(time.time() - t0))) | |
# Combine the results across all batches. | |
flat_predictions = np.concatenate(predictions, axis=0) | |
# For each sample, pick the label (0 or 1) with the higher score. | |
flat_predictions = np.argmax(flat_predictions, axis=1).flatten() | |
# Combine the correct labels for each batch into a single list. | |
flat_true_labels = np.concatenate(true_labels, axis=0) | |
# Calculate the MCC | |
mcc = matthews_corrcoef(flat_true_labels, flat_predictions) | |
print('MCC: {:.3f}'.format(mcc)) | |
# Calculate the accuracy. | |
accuracy = accuracy_score(flat_true_labels, flat_predictions) | |
print('Accuracy: {:.3f}'.format(accuracy)) | |
# Report the number of sentences. | |
print('Number of test sentences: {:,}\n'.format(len(flat_true_labels))) | |
# Create a classification report. | |
report = classification_report(flat_true_labels, flat_predictions, labels=[0,1], target_names=['negative', 'positive'], output_dict=True) | |
print(report) | |
# Save the report | |
with open('report.json', 'w') as f: | |
json.dump(report, f) | |
# Save the predictions | |
with open('predictions.json', 'w') as f: | |
json.dump(flat_predictions.tolist(), f) | |
# Save the true labels | |
with open('true_labels.json', 'w') as f: | |
json.dump(flat_true_labels.tolist(), f) | |
# Save the MCC | |
with open('mcc.json', 'w') as f: | |
json.dump(mcc, f) | |
# Save the accuracy | |
with open('accuracy.json', 'w') as f: | |
json.dump(accuracy, f) | |
# Save the training stats | |
with open('training_stats.json', 'w') as f: | |
json.dump(training_stats, f) | |
# Save the training arguments | |
with open('training_args.json', 'w') as f: | |
json.dump(args, f) | |
# Save the model | |
model.save_pretrained('model_save') | |
# Save the tokenizer | |
tokenizer.save_pretrained('model_save') | |
# Save the training arguments | |
torch.save(args, os.path.join('model_save', 'training_args.bin')) | |
plt.legend() | |
plt.xticks([i for i in range(0, args.num_train_epochs)]) | |
plt.show() | |
# Plot the learning curve. | |
plt.plot(df_stats['Valid. Loss'], 'b-o', label="Training") | |
plt.title("Validation loss") | |
plt.xlabel("Epoch") | |
plt.ylabel("Loss") | |
plt.legend() | |
plt.xticks([i for i in range(0, args.num_train_epochs)]) | |
plt.show() | |
# Plot the learning curve. | |
plt.plot(df_stats['Valid. Accuracy'], 'b-o', label="Training") | |
plt.title("Validation accuracy") | |
plt.xlabel("Epoch") | |
plt.ylabel("Accuracy") | |
plt.legend() | |
plt.xticks([i for i in range(0, args.num_train_epochs)]) | |
plt.show() | |
# ======================================== | |
# Prediction | |
# ======================================== | |
# Load the dataset into a pandas dataframe. | |
df = pd.read_csv("test.csv") | |
# Report the number of sentences. | |
print('Number of test sentences: {:,}\n'.format(df.shape[0])) | |
# Create sentence and label lists | |
sentences = df.text.values | |
# Tokenize all of the sentences and map the tokens to thier word IDs. | |
input_ids = [] | |
# For every sentence... | |
for sent in sentences: | |
# `encode` will: | |
# (1) Tokenize the sentence. | |
# (2) Prepend the `[CLS]` token to the start. | |
# (3) Append the `[SEP]` token to the end. | |
# (4) Map tokens to their IDs. | |
encoded_sent = tokenizer.encode( | |
sent, # Sentence to encode. | |
add_special_tokens = True, # Add '[CLS]' and '[SEP]' | |
) | |
# Add the encoded sentence to the list. | |
input_ids.append(encoded_sent) | |
# Pad our input tokens | |
input_ids = pad_sequences(input_ids, maxlen=args.max_seq_length, dtype="long", | |
value=0, truncating="post", padding="post") | |
# Create attention masks | |
attention_masks = [] | |
# Create a mask of 1s for each token followed by 0s for padding | |
for seq in input_ids: | |
seq_mask = [float(i>0) for i in seq] | |
attention_masks.append(seq_mask) | |
# Convert to tensors. | |
prediction_inputs = torch.tensor(input_ids) | |
prediction_masks = torch.tensor(attention_masks) | |
# Set the batch size. | |
batch_size = 32 | |
# Create the DataLoader. | |
prediction_data = TensorDataset(prediction_inputs, prediction_masks) | |
prediction_sampler = SequentialSampler(prediction_data) | |
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size) | |
# Prediction on test set | |
print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs))) | |
# Put model in evaluation mode | |
model.eval() | |
# Tracking variables | |
predictions , true_labels = [], [] | |
# Predict | |
for batch in prediction_dataloader: | |
# Add batch to GPU | |
batch = tuple(t.to(device) for t in batch) | |
# Unpack the inputs from our dataloader | |
b_input_ids, b_input_mask = batch | |
# Telling the model not to compute or store gradients, saving memory and | |
# speeding up prediction | |
with torch.no_grad(): | |
# Forward pass, calculate logit predictions. | |
# This will return the logits rather than the loss because we have | |
# not provided labels. | |
# token_type_ids is the same as the "segment ids", which | |
# differentiates sentence 1 and 2 in 2-sentence tasks. | |
# The documentation for this `model` function is here: | |
# https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification | |
outputs = model(b_input_ids, | |
token_type_ids=None, | |
attention_mask=b_input_mask) | |
logits = outputs[0] | |
# Move logits and labels to CPU | |
logits = logits.detach().cpu().numpy() | |
# Store predictions and true labels | |
predictions.append(logits) | |
print(' DONE.') | |
print('Positive samples: %d of %d (%.2f%%)' % (df.label.sum(), len(df.label), (df.label.sum() / len(df.label) * 100.0))) | |
# Combine the results across all batches. | |
flat_predictions = np.concatenate(predictions, axis=0) | |
# For each sample, pick the label (0 or 1) with the higher score. | |
flat_predictions = np.argmax(flat_predictions, axis=1).flatten() | |
# Save the predictions | |
with open('predictions.json', 'w') as f: | |
json.dump(flat_predictions.tolist(), f) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment