Created
December 7, 2018 04:13
-
-
Save anmolj7/0e3abef202c61c62013f155701530e95 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.classify import NaiveBayesClassifier | |
from nltk.corpus import subjectivity | |
from nltk.sentiment import SentimentAnalyzer | |
from nltk.sentiment.util import * | |
from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
import nltk, re | |
class Polarity: | |
def __init__(self): | |
n_instances = 100 | |
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]] | |
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]] | |
train_subj_docs = subj_docs[:80] | |
test_subj_docs = subj_docs[80:100] | |
train_obj_docs = obj_docs[:80] | |
test_obj_docs = obj_docs[80:100] | |
training_docs = train_subj_docs + train_obj_docs | |
testing_docs = test_subj_docs + test_obj_docs | |
sentim_analyzer = SentimentAnalyzer() | |
all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs]) | |
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4) | |
sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats) | |
training_set = sentim_analyzer.apply_features(training_docs) | |
test_set = sentim_analyzer.apply_features(testing_docs) | |
trainer = NaiveBayesClassifier.train | |
classifier = sentim_analyzer.train(trainer, training_set) | |
for key, value in sorted(sentim_analyzer.evaluate(test_set).items()): | |
print("{0} : {1}".format(key, value)) | |
def preProcess(self, article_text): | |
formatted_article_text = re.sub('[^a-zA-Z]', ' ', article_text ) | |
formatted_article_text = re.sub(r'\s+', ' ', formatted_article_text) | |
print formatted_article_text | |
return formatted_article_text | |
def polarity(self, text): | |
text = self.preProcess(text) | |
sid = SentimentIntensityAnalyzer() | |
ss = sid.polarity_scores(text) | |
for k in sorted(ss): | |
print "{0}:{1}".format(k, ss[k]) | |
P = Polarity() | |
P.polarity('I really hate it when a book ends.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment