Last active
September 21, 2024 10:35
-
-
Save b2m/6e2697ce182548a98320e4b7b7b885b6 to your computer and use it in GitHub Desktop.
Documented FastAPI wrapper arround the NER component of the de_core_news_sm model from spaCy.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List | |
import spacy | |
import uvicorn | |
from fastapi import FastAPI | |
from pydantic import BaseModel, Field | |
app = FastAPI( | |
title="NER service based on spaCy", | |
description=""" | |
Provides the NER component from [spaCy](https://spacy.io/) as web service. | |
- spaCy: 3.1.1 | |
- Model: [de_core_news_sm](https://spacy.io/models/de#de_core_news_sm) | |
""", | |
docs_url="/", | |
) | |
# load trained pipeline with only the NER component | |
nlp = spacy.load( | |
"de_core_news_sm", | |
disable=[ | |
"tok2vec", | |
"tagger", | |
"morphologizer", | |
"parser", | |
"attribute_ruler", | |
"lemmatizer", | |
], | |
) | |
class NER_Request(BaseModel): | |
""" | |
Request with text to perform NER. | |
""" | |
text: str = Field( | |
..., | |
title="Text", | |
description="Text to extract entities from.", | |
example="Martin Luther war in Wittenberg.", | |
) | |
class Entity(BaseModel): | |
""" | |
Named Entity found in the text. | |
""" | |
start: int = Field( | |
..., | |
title="Start", | |
description="Start position of entity in the text.", | |
ge=0, | |
example=0, | |
) | |
end: int = Field( | |
..., | |
title="End", | |
description="End position of entity in the text.", | |
ge=1, | |
example=2, | |
) | |
text: str = Field( | |
..., | |
title="Text", | |
description="The text of the Named Entity.", | |
min_length=1, | |
example="Martin Luther", | |
) | |
label: str = Field( | |
..., | |
title="Label", | |
description="The label (type) for the Named Entity.", | |
example="PER", | |
) | |
@app.post( | |
"/ner", | |
response_model=List[Entity], | |
summary="Perform NER on text.", | |
response_description="List of found entities.", | |
) | |
def ner(ner_request: NER_Request): | |
""" | |
Performs a Named Entity Recognition on the given `text`. | |
Will return the found entities in a list. | |
""" | |
doc = nlp(ner_request.text) | |
return [ | |
Entity(start=ent.start, end=ent.end, text=ent.text, label=ent.label_) | |
for ent in doc.ents | |
] | |
if __name__ == "__main__": | |
uvicorn.run("ner-service:app", host="127.0.0.1", port=5000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fastapi==0.67.0 | |
pydantic==1.8.2 | |
python-multipart==0.0.5 | |
spacy== 3.1.1 | |
uvicorn==0.14.0 |
This is a simple version without comments using form-urlencoded values.
import spacy
import uvicorn
from fastapi import FastAPI, Form
app = FastAPI()
nlp = spacy.load("de_core_news_sm")
@app.post("/ner")
def ner(text: str = Form(...)):
doc = nlp(text)
return [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
if __name__ == "__main__":
uvicorn.run("simple-ner-service:app", host="127.0.0.1", port=5000)
Jython expression in OpenRefine:
import json, urllib, urllib2
url = 'http://localhost:5000/ner'
request_data = urllib.urlencode({"text": value.encode('utf-8')})
response = urllib2.urlopen(url, request_data)
return json.dumps(json.load(response), ensure_ascii=False)
Nice integration of many technologies ++
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is a documented FastAPI wrapper arround the NER component of the de_core_news_sm model from spaCy.
I wrote this to be able to locally perform Named Entity Recognition on texts in OpenRefine for experiments.
Use the following Jython expression in OpenRefine: