Created
March 31, 2023 11:25
-
-
Save sergiolucero/fc7bad8a60c7ee6ae8c93f2c0ce64964 to your computer and use it in GitHub Desktop.
¿Por qué diantres no funciona esto?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, Trainer, | |
TrainingArguments, default_data_collator | |
import datasets | |
model_name = 'dccuchile/bert-base-spanish-wwm-cased' | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForQuestionAnswering.from_pretrained(model_name) | |
train_data = datasets.load_dataset('squad_es', 'v1.1.0', split='train[:80%]') | |
eval_data = datasets.load_dataset('squad_es', 'v1.1.0', split='train[80%:]') | |
def preprocess_data(examples): | |
return tokenizer(examples['question'], examples['context'], truncation=True, padding='max_length') | |
train_data = train_data.map(preprocess_data, batched=True) | |
eval_data = eval_data.map(preprocess_data, batched=True) | |
training_args = TrainingArguments(output_dir='./results', | |
evaluation_strategy = "epoch", learning_rate=2e-5, | |
per_device_train_batch_size=16, per_device_eval_batch_size=64, | |
num_train_epochs=3, weight_decay=0.01) | |
trainer = Trainer(model=model, args=training_args, | |
train_dataset=train_data, eval_dataset=eval_data, | |
data_collator=default_data_collator,tokenizer=tokenizer) | |
trainer.train() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
el error que me da es:
Traceback (most recent call last): File "/Users/sergio/Sandbox/chatGPT/torch-train.py", line 39, in <module> trainer.train()
File "/Users/sergio/Library/Python/3.9/lib/python/site-packages/transformers/trainer.py", line 1633, in train return inner_training_loop(
File "/Users/sergio/Library/Python/3.9/lib/python/site-packages/transformers/trainer.py", line 1902, in _inner_training_loop tr_loss_step = self.training_step(model, inputs)
File "/Users/sergio/Library/Python/3.9/lib/python/site-packages/transformers/trainer.py", line 2645, in training_step loss = self.compute_loss(model, inputs)
File "/Users/sergio/Library/Python/3.9/lib/python/site-packages/transformers/trainer.py", line 2690, in compute_loss raise ValueError( ValueError: The model did not return a loss from the inputs, only the following keys: start_logits,end_logits. For reference, the inputs it received are input_ids,token_type_ids,attention_mask.