Last active
December 22, 2018 23:11
-
-
Save mayhewsw/d5a51fa24691a80b3238a19d4be8c6d7 to your computer and use it in GitHub Desktop.
Use Allennlp for NER programmatically, and test the runtime.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from allennlp.predictors.predictor import Predictor | |
import time | |
model = "https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.04.26.tar.gz" | |
print("Loading model...") | |
predictor = Predictor.from_path(model) | |
print("Done loading model.") | |
# this is so we can use pre-tokenized text. Just remove the next 3 lines | |
# if you want the text tokenized first. | |
from allennlp.data.tokenizers.word_splitter import JustSpacesWordSplitter | |
spacestok = JustSpacesWordSplitter() | |
predictor._tokenizer = spacestok | |
# pre-tokenized text. | |
sent = "Stephen Mayhew is a person who lives in the North Pole ." | |
# tag 100 times individually. | |
start = time.time() | |
for i in range(100): | |
res = predictor.predict_json({"sentence" : sent}) | |
end = time.time() | |
print(end - start) | |
# create a batch of 100 elements and tag. | |
start = time.time() | |
sents = [] | |
for i in range(100): | |
sents.append({"sentence": sent}) | |
res = predictor.predict_batch_json(sents) | |
end = time.time() | |
print(end - start) | |
# Output (with GPU): | |
# Loading model... | |
# Done loading model. | |
# 19.09427809715271 | |
# 2.3905272483825684 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment