Last active
August 24, 2020 21:48
-
-
Save sevperez/18209b041c30bbb76e9397381e7764bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# select person entities | |
def select_person_entities(doc): | |
return [ent for ent in doc.entities if ent.type == "PERSON"] | |
def person_df(doc): | |
""" | |
- Parameters: doc (a Stanza Document object) | |
- Returns: A Pandas DataFrame with one row for each entity in doc | |
that has a "PERSON" type, and and columns text, type, start_char, | |
and the sentiment of the sentence in which the entity appears. | |
""" | |
rows = [] | |
persons = select_person_entities(doc) | |
for person in persons: | |
row = { | |
"text": person.text, | |
"type": person.type, | |
"start_char": person.start_char, | |
"end_char": person.end_char, | |
"sentence_sentiment": sentiment_descriptor(person._sent) | |
} | |
rows.append(row) | |
return pd.DataFrame(rows) | |
characters = person_df(moby_dick) | |
display(characters.head()) | |
# text type start_char end_char sentence_sentiment | |
# 0 Ishmael PERSON 29 36 neutral | |
# 1 Cato PERSON 890 894 neutral | |
# 2 Tiger-lilies PERSON 4226 4238 neutral | |
# 3 Jove PERSON 4988 4992 neutral | |
# 4 Narcissus PERSON 5080 5089 negative |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment