Skip to content

Instantly share code, notes, and snippets.

@wesslen
Created September 4, 2019 14:53
Show Gist options
  • Select an option

  • Save wesslen/311a1606d649f73c7487637d5d853b2f to your computer and use it in GitHub Desktop.

Select an option

Save wesslen/311a1606d649f73c7487637d5d853b2f to your computer and use it in GitHub Desktop.
handling spaCy entities
import spacy
from spacy import displacy
path = "en_core_web_sm"
nlp = spacy.load(path)
path_folder = "/path/to/file/"
import pandas as pd
df = pd.read_csv(path_folder + "file.csv")
# convert entities to list
docs = list()
for index, row in df.iterrows():
docs.append(row['text'], dict(id = row['id']))
%%time
r = []
doc = list()
for doc, context in nlp.pipe(docs, as_tuples=True):
add = [(ent.text, ent.label_, ent.start_char, ent.end_char, context['id'])
r.append(add)
# run to csv
l = []
for i in r:
p = pd.DataFrame(i, columns=['text','label','start_char','end_char','id'])
l.append(p)
dfr = pd.concat(l, ignore_index = True)
dfr.to_csv(path_folder + "entities.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment