Created
June 30, 2020 08:59
-
-
Save Hironsan/534b99ddafa40fb1c677b7cb6ce4e89d to your computer and use it in GitHub Desktop.
spaCy for Lambda
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import urllib.request | |
import spacy | |
import tarfile | |
from pathlib import Path | |
def maybe_download(model, dest): | |
save_path = Path(dest) / model | |
if not os.path.exists(save_path): | |
print('Downloading...') | |
url = f'https://github.com/explosion/spacy-models/releases/download/{model}/{model}.tar.gz' | |
filename = Path(dest) / f'{model}.tar.gz' | |
res = urllib.request.urlretrieve(url, filename) | |
with tarfile.open(filename) as f: | |
f.extractall(path=dest) | |
dirname = model.split('-')[0] | |
return save_path / dirname / model | |
def lambda_handler(event, context): | |
doc = nlp(event['text']) | |
response = [ | |
{ | |
'text': ent.text, | |
'label': ent.label_, | |
'start': ent.start_char, | |
'end': ent.end_char | |
} | |
for ent in doc.ents | |
] | |
return response | |
model = 'en_core_web_sm-2.3.0' | |
mnt_path = '/mnt/models' | |
model_path = maybe_download(model, mnt_path) | |
nlp = spacy.load(model_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment