debonx · December 11, 2018 09:34
diff --git a/naive_bayes_classifier_ex1.py b/naive_bayes_classifier_ex1.py
 # REQUIREMENTS
 # - A tagged dataset is necessary to calculate the probabilities used in Bayes' Theorem.
 # - In order to apply Bayes' Theorem, we assume that these features are independent.
 # - Using Bayes' Theorem, we can find P(class|data point) for every possible class. The class with the highest probability will be the algorithm’s prediction.

 # MORE Improvements for the Natural Language Processing.
 # - Remove punctuation from the training set (great! into great)
 # - Lowercase every word in the training set (Great into great)
 # - Use a bigram or trigram model, makes the assumption of independence more reasonable ("this is" or "is great" instead of single words)

 from reviews import baby_counter, baby_training, instant_video_counter, instant_video_training, video_game_counter, video_game_training
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.naive_bayes import MultinomialNB

 #The review to check
 review = "Having sex is good"

 #Tranforming the review for different kind of classifications
 baby_review_counts = baby_counter.transform([review])
 instant_video_review_counts = instant_video_counter.transform([review])
 video_game_review_counts = video_game_counter.transform([review])

 #initialize the classifiers
 baby_classifier = MultinomialNB()
 instant_video_classifier = MultinomialNB()
 video_game_classifier = MultinomialNB()

 #We have datapoints but we don't have labels which should be negative for the first 1000 and positive for the second 1000
 baby_labels = [0] * 1000 + [1] * 1000
 instant_video_labels = [0] * 1000 + [1] * 1000
 video_game_labels = [0] * 1000 + [1] * 1000

 #Fit the classifiers with (data points, data labels)
 baby_classifier.fit(baby_training, baby_labels)
 instant_video_classifier.fit(instant_video_training, instant_video_labels)
 video_game_classifier.fit(video_game_training, video_game_labels)

 #Predict the probability (with .predict_proba method) that the review is bad or good
 print("Baby training set: " +str(baby_classifier.predict_proba(baby_review_counts)))
 print("Amazon Instant Video training set: " + str(instant_video_classifier.predict_proba(instant_video_review_counts)))
 print("Video Games training set: " + str(video_game_classifier.predict_proba(video_game_review_counts)))
	# REQUIREMENTS
	# - A tagged dataset is necessary to calculate the probabilities used in Bayes' Theorem.
	# - In order to apply Bayes' Theorem, we assume that these features are independent.
	# - Using Bayes' Theorem, we can find P(class\|data point) for every possible class. The class with the highest probability will be the algorithm’s prediction.

	# MORE Improvements for the Natural Language Processing.
	# - Remove punctuation from the training set (great! into great)
	# - Lowercase every word in the training set (Great into great)
	# - Use a bigram or trigram model, makes the assumption of independence more reasonable ("this is" or "is great" instead of single words)

	from reviews import baby_counter, baby_training, instant_video_counter, instant_video_training, video_game_counter, video_game_training
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.naive_bayes import MultinomialNB

	#The review to check
	review = "Having sex is good"

	#Tranforming the review for different kind of classifications
	baby_review_counts = baby_counter.transform([review])
	instant_video_review_counts = instant_video_counter.transform([review])
	video_game_review_counts = video_game_counter.transform([review])

	#initialize the classifiers
	baby_classifier = MultinomialNB()
	instant_video_classifier = MultinomialNB()
	video_game_classifier = MultinomialNB()

	#We have datapoints but we don't have labels which should be negative for the first 1000 and positive for the second 1000
	baby_labels = [0] * 1000 + [1] * 1000
	instant_video_labels = [0] * 1000 + [1] * 1000
	video_game_labels = [0] * 1000 + [1] * 1000

	#Fit the classifiers with (data points, data labels)
	baby_classifier.fit(baby_training, baby_labels)
	instant_video_classifier.fit(instant_video_training, instant_video_labels)
	video_game_classifier.fit(video_game_training, video_game_labels)

	#Predict the probability (with .predict_proba method) that the review is bad or good
	print("Baby training set: " +str(baby_classifier.predict_proba(baby_review_counts)))
	print("Amazon Instant Video training set: " + str(instant_video_classifier.predict_proba(instant_video_review_counts)))
	print("Video Games training set: " + str(video_game_classifier.predict_proba(video_game_review_counts)))