williamFalcon · January 23, 2021 08:09
diff --git a/nov_23_mnli_a.py b/nov_23_mnli_a.py
 from transformers import BertModel
 import torch.nn.functional as F


 class BertMNLIFinetuner(pl.LightningModule):

    def __init__(self):
        super(BertMNLIFinetuner, self).__init__()
        
        # use pretrained BERT
        self.bert = BertModel.from_pretrained('bert-base-cased', output_attentions=True)
        
        # fine tuner (3 classes)
        self.W = nn.Linear(bert.config.hidden_size, 3)
        self.num_classes = 3


    def forward(self, input_ids, attention_mask, token_type_ids):
      
        h, _, attn = self.bert(input_ids=input_ids, 
                         attention_mask=attention_mask, 
                         token_type_ids=token_type_ids)
        
        h_cls = h[:, 0]
        logits = self.W(h_cls)
        return logits, attn

    def training_step(self, batch, batch_nb):
        # batch
        input_ids, attention_mask, token_type_ids, label = batch
         
        # fwd
        y_hat, attn = self.forward(input_ids, attention_mask, token_type_ids)
        
        # loss
        loss = F.cross_entropy(y_hat, label)
        
        # logs
        tensorboard_logs = {'train_loss': loss}
        return {'loss': loss, 'log': tensorboard_logs}
	from transformers import BertModel
	import torch.nn.functional as F


	class BertMNLIFinetuner(pl.LightningModule):

	def __init__(self):
	super(BertMNLIFinetuner, self).__init__()

	# use pretrained BERT
	self.bert = BertModel.from_pretrained('bert-base-cased', output_attentions=True)

	# fine tuner (3 classes)
	self.W = nn.Linear(bert.config.hidden_size, 3)
	self.num_classes = 3


	def forward(self, input_ids, attention_mask, token_type_ids):

	h, _, attn = self.bert(input_ids=input_ids,
	attention_mask=attention_mask,
	token_type_ids=token_type_ids)

	h_cls = h[:, 0]
	logits = self.W(h_cls)
	return logits, attn

	def training_step(self, batch, batch_nb):
	# batch
	input_ids, attention_mask, token_type_ids, label = batch

	# fwd
	y_hat, attn = self.forward(input_ids, attention_mask, token_type_ids)

	# loss
	loss = F.cross_entropy(y_hat, label)

	# logs
	tensorboard_logs = {'train_loss': loss}
	return {'loss': loss, 'log': tensorboard_logs}