Severin Perez sevperez

Writer | Data Scientist

sevperez / writing_1.py

Created October 8, 2020 07:00

sevperez / exploring_stanza_13.py

Created August 24, 2020 21:51

	# What is the sentiment surrounding each character?
	def sentiment_descriptor_to_val(descriptor):
	"""
	- Parameters: descriptor ("negative", "neutral", or "positive")
	- Returns: -1 for "negative", 0 for "neutral", 1 for "positive"
	"""
	if descriptor == "negative":
	return -1
	elif descriptor == "neutral":
	return 0

sevperez / exploring_stanza_12.py

Created August 24, 2020 21:49

	# Which characters appear most frequently?
	def frequency_count(df, col, limit=10):
	return df[col].value_counts().head(limit)

	frequency_count(characters, "text")

	# Ahab 474
	# Stubb 224
	# Queequeg 184
	# Starbuck 140

sevperez / exploring_stanza_11.py

Created August 24, 2020 21:48

	def num_unique_items(df, col):
	return len(df[col].unique())

	num_unique_items(characters, "text")

	# 699

sevperez / exploring_stanza_10.py

Last active August 24, 2020 21:48

	# select person entities
	def select_person_entities(doc):
	return [ent for ent in doc.entities if ent.type == "PERSON"]

	def person_df(doc):
	"""
	- Parameters: doc (a Stanza Document object)
	- Returns: A Pandas DataFrame with one row for each entity in doc
	that has a "PERSON" type, and and columns text, type, start_char,
	and the sentiment of the sentence in which the entity appears.

sevperez / exploring_stanza_9.py

Created August 24, 2020 21:45

	# load the full text and put it through the pipeline
	def load_text_doc(file_path):
	with open(file_path) as f:
	txt = f.read()
	return txt

	moby_path = "moby_dick.txt"
	moby_dick_text = load_text_doc(moby_path)
	moby_dick = nlp(moby_dick_text)

sevperez / exploring_stanza_8.py

Last active August 24, 2020 21:45

	def sentiment_descriptor(sentence):
	"""
	- Parameters: sentence (a Stanza Sentence object)
	- Returns: A string descriptor for the sentiment value of sentence.
	"""
	sentiment_value = sentence.sentiment
	if (sentiment_value == 0):
	return "negative"
	elif (sentiment_value == 1):
	return "neutral"

sevperez / exploring_stanza_7.py

Created August 24, 2020 21:42

	def print_entity_info(entity):
	print(f"Text:\t{entity.text}")
	print(f"Type:\t{entity.type}")
	print(f"Start:\t{entity.start_char}")
	print(f"End:\t{entity.end_char}")

	print_entity_info(moby_p1.entities[0])

	# Text: Ishmael
	# Type: PERSON

sevperez / exploring_stanza_6.py

Created August 24, 2020 21:41

	def print_word_info(word):
	print(f"Text:\t{word.text}")
	print(f"Lemma: \t{word.lemma}")
	print(f"UPOS: \t{word.upos}")
	print(f"XPOS: \t{word.xpos}")

	print_word_info(moby_p1.sentences[3].words[4])

	# Text: growing
	# Lemma: grow

sevperez / exploring_stanza_5.py

Created August 24, 2020 21:41

	def print_token_info(token):
	print(f"Text:\t{token.text}")
	print(f"Start:\t{token.start_char}")
	print(f"End:\t{token.end_char}")

	print_token_info(moby_p1.sentences[0].tokens[2])

	# Text: Ishmael
	# Start: 8
	# End: 15