Skip to content

Instantly share code, notes, and snippets.

@khaledadrani
Created December 21, 2021 18:12
Show Gist options
  • Select an option

  • Save khaledadrani/7230d4e9b4c92bda5ae175a0bee2f14f to your computer and use it in GitHub Desktop.

Select an option

Save khaledadrani/7230d4e9b4c92bda5ae175a0bee2f14f to your computer and use it in GitHub Desktop.
import spacy
nlp = spacy.load('en_core_web_md')
def annotate_text(doc):
ls = []
for ent in doc.ents:
entry = dict()
entry['text'] = ent.text
entry['label'] = ent.label_
entry['start'] = ent.start_char
entry['end'] = ent.end_char
entry['propertiesList'] = []
ls.append(entry)
return ls
doc = nlp(corpus[0])
annotate_text(doc)
def annotate_corpus(corpus,nlp):
res = []
for doc in nlp.pipe(corpus):
entry = dict()
entry['document'] = str(doc)
entry['annotation'] = annotate_text(doc)
res.append(entry)
return res
data = annotate_corpus(corpus,nlp)
data[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment