Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Pull and install Huggingface Transformers Repo | |
git clone https://github.com/huggingface/transformers && cd transformers | |
pip install . | |
pip install nltk py-rouge | |
cd examples/summarization | |
#------------------------------ | |
# Download original Summarization Datasets. The code downloads from Google drive on Linux | |
wget --save-cookies cookies.txt --keep-session-cookies --no-check-certificate 'https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfTHk4NFg2SndKcjQ' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/Code: \1\n/p' | |
wget --load-cookies cookies.txt --no-check-certificate 'https://drive.google.com/uc?export=download&confirm=<CONFIRMATION CODE HERE>&id=0BwmD_VLjROrfTHk4NFg2SndKcjQ' -O cnn_stories.tgz |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import XLNetTokenizer, XLNetForQuestionAnsweringSimple | |
import torch | |
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') | |
model = XLNetForQuestionAnsweringSimple.from_pretrained('xlnet-base-cased') | |
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 | |
print(f'Encoded sequence ids -- {input_ids.tolist()[0]}') | |
# Encoded sequence ids -- [17, 11368, 19, 94, 2288, 27, 10920, 4, 3] | |
start_positions = torch.tensor([1]) | |
end_positions = torch.tensor([3]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import XLNetTokenizer, XLNetForSequenceClassification | |
import torch | |
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') | |
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased') | |
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 | |
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 | |
outputs = model(input_ids, labels=labels) | |
loss, logits = outputs[:2] | |
print(f'Current Loss at -- {loss.tolist()}') | |
# Current Loss at -- 1.1906177997589111 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import XLNetTokenizer, XLNetLMHeadModel | |
import torch | |
import torch.nn.functional as F | |
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') | |
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased') | |
# We show how to setup inputs to predict a next token using a bi-directional context. | |
encoded_text = tokenizer.encode("Quick brown fox jumped over the lazy <mask>.", add_special_tokens=True) | |
input_ids = torch.tensor(encoded_text).unsqueeze(0) # We will predict the masked token | |
print(f'Input squence -- {encoded_text}') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Asyncrhonously processes text in a document stored in an S3 bucket. For set up information, see https://docs.aws.amazon.com/textract/latest/dg/async.html | |
import boto3 | |
import json | |
import sys | |
import time | |
class ProcessType: | |
DETECTION = 1 | |
ANALYSIS = 2 |