Last active
July 3, 2018 01:11
-
-
Save alialavia/1c3ad92d72789d4a010f3802a9ad224c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
To evaluate the good or bad score of a tweet, we first tokenize the tweet, and then | |
stemmize each word in our tweet. We also associate each stem with positive and negative values, | |
respectively, using a dictionary. | |
Finally, we caculate the average word weight of a tweet, and decide if it's a good or bad one | |
based on that. | |
""" | |
import json | |
from nltk import word_tokenize | |
from nltk.stem.porter import * | |
stemmer = PorterStemmer() | |
# Break down a string into words | |
def get_words(str): | |
return word_tokenize(str) | |
# Initialize word weights and read them from word_weights.json | |
word_weights = {} | |
with open("word_weights.json") as f: # open the json file, and put its handler in variable f | |
word_weights = json.load(f) # read the content of the file into | |
# Calculate the average value of words in list_of_words | |
def get_average_word_weight(list_of_words): | |
number_of_words = len(list_of_words) | |
sum_of_word_weights = 0.0 | |
for w in list_of_words: | |
stemmed_word = stemmer.stem(w) | |
if stemmed_word in word_weights: | |
sum_of_word_weights += word_weights[stemmed_word] | |
else: | |
print ('"' + stemmed_word + '": 0.0,') | |
return sum_of_word_weights / number_of_words | |
tweet_string = "Thanks to the historic TAX CUTS that I signed into law, your paychecks are going way UP, your taxes are going way DOWN, and America is once again OPEN FOR BUSINESS!" | |
words = get_words(tweet_string) | |
avg_tweet_weight = get_average_word_weight(words) | |
print ("The weight of the tweet is " + str(avg_tweet_weight)) | |
if avg_tweet_weight > 0: | |
print ("What a presidential thing to say! HUGE!") | |
else: | |
print ("Surely you're joking, Mr. Trump! SAD!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment