Last active
November 17, 2023 18:07
-
-
Save AO8/78ef34cf37b103f2c41d8d34474dbcb8 to your computer and use it in GitHub Desktop.
Python text analyzer using TextBlob and Textatistic.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
from textblob import TextBlob | |
from textblob.sentiments import NaiveBayesAnalyzer | |
from textatistic import Textatistic | |
# This corpus is required for the Naive Bayes Analyzer | |
nltk.download("movie_reviews") | |
def main(): | |
user_text = get_user_text() | |
blob = convert_to_blob(user_text) | |
texta_dict = convert_to_textatistic_dict(user_text) | |
print() | |
display_sentiment(blob) | |
print() | |
display_basic_stats(texta_dict) | |
print() | |
display_readability(texta_dict) | |
def get_user_text(): | |
user_text = input("Copy and paste the text you would like to analyze below:\n") | |
return user_text | |
def convert_to_blob(text): | |
blob = TextBlob(text, analyzer=NaiveBayesAnalyzer()) | |
return blob.sentiment | |
def convert_to_textatistic_dict(text): | |
return Textatistic(text).dict() | |
def display_sentiment(blob): | |
"""Uses the Naive Bayes Anaylzer corpus to measure | |
a text's positive percentage, negative percentage, and | |
the text's overall classification, where 'pos' is | |
positive and 'neg' is negative.""" | |
print("Sentiment Summary:\n".upper()) | |
print(f"Positive percentage: {round(blob.p_pos * 100, 2)}%") | |
print(f"Negative percentage: {round(blob.p_neg * 100, 2)}%") | |
print(f"Overall sentiment classification: {blob.classification}") | |
def display_basic_stats(texta_dict): | |
"""Pulls from a Textatistic dictionary to diplays character count, | |
word count, sentence count, and number of polysyllable words.""" | |
print("Basic Stats:\n".upper()) | |
print(f"Character count: {texta_dict['char_count']}") | |
print(f"Word count: {texta_dict['word_count']}") | |
print(f"Sentence count: {texta_dict['sent_count']}") | |
print(f"Polysyllable words used: {texta_dict['polysyblword_count']}") | |
def display_readability(texta_dict): | |
"""Flesh Kincaid score corresponds to a specific grade level. | |
Gunning Fog score corresponds to a specific grade level. | |
SMOG score corresponds to the years of education required to understand a text. | |
Dale-Chall score maps to grade levels from 4 and below to college grads (grade 16) and above.""" | |
print("Readability Summary:\n".upper()) | |
print(f"Flesch Kincaid score: {round(texta_dict['fleschkincaid_score'], 1)}") | |
print(f"Gunning Fog score: {round(texta_dict['gunningfog_score'], 1)}") | |
print(f"Simple Measure of Gobbledygook score: {round(texta_dict['smog_score'], 1)}") | |
print(f"Dale-Chall score: {round(texta_dict['dalechall_score'], 1)}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment