balachandrapai · March 11, 2018 18:21
diff --git a/TextClassify.py b/TextClassify.py
 ##Text classification is always in the form of binary.
 ##ie, either spam or not spam

 import nltk
 import random
 from nltk.corpus import movie_reviews
 import pickle

 ## Create a list of tuples or features

 documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

 ##documents = []

 ##for category in movie_reviews.categories():
 ##    for fileid in movie_reviews.fileids(category):
 ##        documents.append(list(movie_reviews.words(fileid)), category)


 random.shuffle(documents)

 ##print(documents[1])

 ##Convert everything to lower case
 all_words = []
 for w in movie_reviews.words():
    all_words.append(w.lower())

 ##nltk freq distribution
 all_words = nltk.FreqDist(all_words)
 ##print(all_words.most_common(15))
 ##print(all_words["stupid"])

 ##top 3,000 most common words
 word_features = list(all_words.keys())[:3000]

 ##Find the words and mark their pressence as positive or negative
 def find_features(document):
    words = set(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)

    return features

 ##Word which is true implies that it is common to appear in negative reviews
 ##print((find_features(movie_reviews.words('neg/cv000_29416.txt'))))

 ##for all of our documents, saving the feature existence booleans and their
 ##respective positive or negative categories
 featuresets = [(find_features(rev), category) for (rev, category) in documents]

 # set that we'll train our classifier with
 training_set = featuresets[:1900]

 # set that we'll test against.
 testing_set = featuresets[1900:]

 ##classifier = nltk.NaiveBayesClassifier.train(training_set)

 ##Calling the pickle file and using the saved classifier
 classifier_f = open("naivebayes.pickle", "rb")
 classifier = pickle.load(classifier_f)
 classifier_f.close()

 ##Saving the classifiers as pickle object
 ##save_classifier = open("naivebayes.pickle","wb")
 ##pickle.dump(classifier, save_classifier)
 ##save_classifier.close()

 print("Classifier accuracy percent:",(nltk.classify.accuracy(classifier, testing_set))*100)

 classifier.show_most_informative_features(15)
	##Text classification is always in the form of binary.
	##ie, either spam or not spam

	import nltk
	import random
	from nltk.corpus import movie_reviews
	import pickle

	## Create a list of tuples or features

	documents = [(list(movie_reviews.words(fileid)), category)
	for category in movie_reviews.categories()
	for fileid in movie_reviews.fileids(category)]

	##documents = []

	##for category in movie_reviews.categories():
	## for fileid in movie_reviews.fileids(category):
	## documents.append(list(movie_reviews.words(fileid)), category)


	random.shuffle(documents)

	##print(documents[1])

	##Convert everything to lower case
	all_words = []
	for w in movie_reviews.words():
	all_words.append(w.lower())

	##nltk freq distribution
	all_words = nltk.FreqDist(all_words)
	##print(all_words.most_common(15))
	##print(all_words["stupid"])

	##top 3,000 most common words
	word_features = list(all_words.keys())[:3000]

	##Find the words and mark their pressence as positive or negative
	def find_features(document):
	words = set(document)
	features = {}
	for w in word_features:
	features[w] = (w in words)

	return features

	##Word which is true implies that it is common to appear in negative reviews
	##print((find_features(movie_reviews.words('neg/cv000_29416.txt'))))

	##for all of our documents, saving the feature existence booleans and their
	##respective positive or negative categories
	featuresets = [(find_features(rev), category) for (rev, category) in documents]

	# set that we'll train our classifier with
	training_set = featuresets[:1900]

	# set that we'll test against.
	testing_set = featuresets[1900:]

	##classifier = nltk.NaiveBayesClassifier.train(training_set)

	##Calling the pickle file and using the saved classifier
	classifier_f = open("naivebayes.pickle", "rb")
	classifier = pickle.load(classifier_f)
	classifier_f.close()

	##Saving the classifiers as pickle object
	##save_classifier = open("naivebayes.pickle","wb")
	##pickle.dump(classifier, save_classifier)
	##save_classifier.close()

	print("Classifier accuracy percent:",(nltk.classify.accuracy(classifier, testing_set))*100)

	classifier.show_most_informative_features(15)