Created
March 29, 2019 17:21
-
-
Save hpwxf/61ebb78139c222a16f423aed2aaf8dce to your computer and use it in GitHub Desktop.
A small test on spaCy Natural Language handling
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
from spacy import displacy | |
from pathlib import Path | |
# spaCy API Documentation : https://spacy.io/api | |
nlp = spacy.load("en_core_web_sm") | |
doc = nlp(u"Next friday at 1 PM, I will have lunch in Paris with my friend Chris, the one who has a red hair cat.") | |
options={"collapse_punct": True, "word_spacing":10, "distance":100} | |
# SVG output for token relations | |
svg = displacy.render(doc, style="dep", options=options) | |
output_path = Path("sentence.svg") | |
output_path.open("w", encoding="utf-8").write(svg) | |
# HTML output for token entities (cf page option) | |
html = displacy.render(doc, style="ent", options=options, page=True) | |
output_path = Path("sentence.html") | |
output_path.open("w", encoding="utf-8").write(html) | |
print("Tokens:") | |
for token in doc: | |
print("\t", token.pos_, token.text) | |
print("Entities", [(ent.text, ent.label_) for ent in doc.ents]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment