fclesio · July 3, 2019 10:43
diff --git a/lexical-diversity.py b/lexical-diversity.py
 def get_lexical_diversity(df, artist):
    dataframe = df[df['artist'] == artist]

    # Word stats
    full_text_count = pd.DataFrame(Counter(" ".join(dataframe["lyric"]).split()), index=[0])
    full_text_count = full_text_count.T
    full_text_count = full_text_count.reset_index()
    full_text_count.columns = ['word', 'qty']

    # Distinct words to include in numerator
    distinct_words = set()
    dataframe['lyric'].str.lower().str.split().apply(distinct_words.update)
    total_distinct_words = len(distinct_words)

    # All words to include in denominator
    total_words = full_text_count['qty'].sum()

    lexical_diversity = round((total_distinct_words / total_words),2)

    print(f'Lexical Diversity for {artist}: {lexical_diversity}')
	def get_lexical_diversity(df, artist):
	dataframe = df[df['artist'] == artist]

	# Word stats
	full_text_count = pd.DataFrame(Counter(" ".join(dataframe["lyric"]).split()), index=[0])
	full_text_count = full_text_count.T
	full_text_count = full_text_count.reset_index()
	full_text_count.columns = ['word', 'qty']

	# Distinct words to include in numerator
	distinct_words = set()
	dataframe['lyric'].str.lower().str.split().apply(distinct_words.update)
	total_distinct_words = len(distinct_words)

	# All words to include in denominator
	total_words = full_text_count['qty'].sum()

	lexical_diversity = round((total_distinct_words / total_words),2)

	print(f'Lexical Diversity for {artist}: {lexical_diversity}')