Skip to content

Instantly share code, notes, and snippets.

@remi-or
remi-or / roberta_peek.py
Created January 17, 2022 18:04
Roberta peek
from transformers import AutoModelForMaskedLM
roberta = AutoModelForMaskedLM.from_pretrained("roberta-large")
print(roberta)
import matplotlib.pyplot as plt
import seaborn as sns
def average_word_count(list_of_texts):
"""
Returns the average word count of a list of texts.
"""
total_count = 0
for text in list_of_texts:
text = text.replace("'", ' ')
@remi-or
remi-or / Datasets.py
Last active August 16, 2021 10:54
Snippet for loading datasets
# This snippet requires you to install Hugging Face's datasets module
from datasets import load_dataset
import pandas as pd
Dataframe = pd.DataFrame({})
questions = load_dataset('squad')['train']['question'][:3000]
Dataframe = Dataframe.append(pd.DataFrame({'Text' : questions, 'Source' : 'squad'}))
questions = load_dataset('hotpot_qa', 'distractor')['train']['question'][:3000]