This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
s = [1, 2, 3, 4, 5] | |
def foo(l, bar): | |
return list(map(bar, l)) | |
print(foo(s, lambda x: x * 2)) # [2, 4, 6, 8, 10] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# What is the sentiment surrounding each character? | |
def sentiment_descriptor_to_val(descriptor): | |
""" | |
- Parameters: descriptor ("negative", "neutral", or "positive") | |
- Returns: -1 for "negative", 0 for "neutral", 1 for "positive" | |
""" | |
if descriptor == "negative": | |
return -1 | |
elif descriptor == "neutral": | |
return 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Which characters appear most frequently? | |
def frequency_count(df, col, limit=10): | |
return df[col].value_counts().head(limit) | |
frequency_count(characters, "text") | |
# Ahab 474 | |
# Stubb 224 | |
# Queequeg 184 | |
# Starbuck 140 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def num_unique_items(df, col): | |
return len(df[col].unique()) | |
num_unique_items(characters, "text") | |
# 699 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# select person entities | |
def select_person_entities(doc): | |
return [ent for ent in doc.entities if ent.type == "PERSON"] | |
def person_df(doc): | |
""" | |
- Parameters: doc (a Stanza Document object) | |
- Returns: A Pandas DataFrame with one row for each entity in doc | |
that has a "PERSON" type, and and columns text, type, start_char, | |
and the sentiment of the sentence in which the entity appears. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load the full text and put it through the pipeline | |
def load_text_doc(file_path): | |
with open(file_path) as f: | |
txt = f.read() | |
return txt | |
moby_path = "moby_dick.txt" | |
moby_dick_text = load_text_doc(moby_path) | |
moby_dick = nlp(moby_dick_text) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sentiment_descriptor(sentence): | |
""" | |
- Parameters: sentence (a Stanza Sentence object) | |
- Returns: A string descriptor for the sentiment value of sentence. | |
""" | |
sentiment_value = sentence.sentiment | |
if (sentiment_value == 0): | |
return "negative" | |
elif (sentiment_value == 1): | |
return "neutral" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def print_entity_info(entity): | |
print(f"Text:\t{entity.text}") | |
print(f"Type:\t{entity.type}") | |
print(f"Start:\t{entity.start_char}") | |
print(f"End:\t{entity.end_char}") | |
print_entity_info(moby_p1.entities[0]) | |
# Text: Ishmael | |
# Type: PERSON |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def print_word_info(word): | |
print(f"Text:\t{word.text}") | |
print(f"Lemma: \t{word.lemma}") | |
print(f"UPOS: \t{word.upos}") | |
print(f"XPOS: \t{word.xpos}") | |
print_word_info(moby_p1.sentences[3].words[4]) | |
# Text: growing | |
# Lemma: grow |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def print_token_info(token): | |
print(f"Text:\t{token.text}") | |
print(f"Start:\t{token.start_char}") | |
print(f"End:\t{token.end_char}") | |
print_token_info(moby_p1.sentences[0].tokens[2]) | |
# Text: Ishmael | |
# Start: 8 | |
# End: 15 |