Skip to content

Instantly share code, notes, and snippets.

@hackintoshrao
Last active February 13, 2017 12:33
Show Gist options
  • Select an option

  • Save hackintoshrao/81b752b33e4d05ac17f20703ebbbaf6e to your computer and use it in GitHub Desktop.

Select an option

Save hackintoshrao/81b752b33e4d05ac17f20703ebbbaf6e to your computer and use it in GitHub Desktop.
## Read the reviews first.
g = open('reviews.txt','r') # What we know!
reviews = list(map(lambda x:x[:-1],g.readlines()))
g.close()
g = open('labels.txt','r') # What we WANT to know!
labels = list(map(lambda x:x[:-1].upper(),g.readlines()))
g.close()
from collections import Counter
import numpy as np
positive_counts = Counter()
negative_counts = Counter()
total_counts = Counter()
for i in range(len(reviews)):
# first get count of all words with POSITIVE review.
if(labels[i] == 'POSITIVE'):
for word in reviews[i].split(" "):
positive_counts[word] += 1
total_counts[word] += 1
else:
# get count of all words with NEGATIVE review.
for word in reviews[i].split(" "):
negative_counts[word] += 1
total_counts[word] += 1
negative_counts.most_common()
positive_counts.most_common()
pos_neg_ratios = Counter()
for term,cnt in list(total_counts.most_common()):
if(cnt > 100):
pos_neg_ratio = positive_counts[term] / float(negative_counts[term]+1)
pos_neg_ratios[term] = pos_neg_ratio
for word,ratio in pos_neg_ratios.most_common():
if(ratio > 1):
pos_neg_ratios[word] = np.log(ratio)
else:
pos_neg_ratios[word] = -np.log((1 / (ratio+0.01)))
pos_neg_ratios.most_common()
list(reversed(pos_neg_ratios.most_common()))[0:30]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment