Created
April 2, 2012 06:54
-
-
Save Lambdanaut/2281311 to your computer and use it in GitHub Desktop.
A Naive Bayesian Classifier. The Classifier class can be trained by "classifier.train(True,'I am so happy!')" or "classifier.train(False,'I am a sad man. ')". To check if something falls in either the True or the False category.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/python2 | |
| from __future__ import division | |
| from ignoreWords import * | |
| class Classifier: | |
| def __init__ (self): | |
| self.examples = [] | |
| def classify (self, message): | |
| if self.examples != []: | |
| return self.chanceOfCategory(True, message) > self.chanceOfCategory(False, message) | |
| else: print("You need to give examples before a classification can occur! ") | |
| def chanceOfCategory (self, category, message): | |
| cleanMessage = self.zoneInOnCategory(category, self.cleanMessage(message) ) | |
| examples = self.getExamples() | |
| examplesLen = len (examples) | |
| categoryExamples = self.getExamples(category=category) | |
| categoryExamplesLen = len (categoryExamples) | |
| probOfCategory = categoryExamplesLen / examplesLen | |
| probOfWords = 1 | |
| for word in cleanMessage: | |
| # Numerator | |
| examplesWithWord = 0 | |
| for example in categoryExamples: | |
| if word in example: | |
| examplesWithWord += 1 | |
| if examplesWithWord > 0: probOfCategory *= examplesWithWord / categoryExamplesLen | |
| # Denominator | |
| examplesWithWord = 0 | |
| for example in examples: | |
| if word in example: examplesWithWord += 1 | |
| if examplesWithWord > 0: probOfWords *= examplesWithWord / examplesLen | |
| probOfCategoryGivenWords = probOfCategory / probOfWords | |
| return probOfCategoryGivenWords | |
| def cleanMessage(self, message): | |
| newMessage = [] | |
| for word in message.strip().replace(".","").replace(",","").replace("?","").replace("!","").split(" "): | |
| if not word.lower() in ignoreWords: newMessage.append(word.lower()) | |
| return newMessage | |
| def zoneInOnCategory(self, category, cleanMessage): | |
| newMessage = [] | |
| categoryExamples = [item for sublist in self.getExamples(category=category) for item in sublist] | |
| for word in cleanMessage: | |
| if word in categoryExamples: newMessage.append(word) | |
| return newMessage | |
| def getExamples(self, category = None): | |
| if category == None: return map (lambda example: example[1] ,self.examples) | |
| else: return map (lambda example: example[1], filter (lambda example: example[0] == category, self.examples) ) | |
| def train(self, category, message): | |
| self.examples.append( (category, self.cleanMessage(message) ) ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ignoreWords = [ | |
| 'a', 'about', 'above', 'across', 'after', 'afterwards', | |
| 'again', 'against', 'all', 'almost', 'alone', 'along', | |
| 'already', 'also', 'although', 'always', 'am', 'among', | |
| 'amongst', 'amoungst', 'amount', 'an', 'and', 'another', | |
| 'any', 'anyhow', 'anyone', 'anything', 'anyway', 'anywhere', | |
| 'are', 'around', 'as', 'at', 'back', 'be', | |
| 'became', 'because', 'become', 'becomes', 'becoming', 'been', | |
| 'before', 'beforehand', 'behind', 'being', 'below', 'beside', | |
| 'besides', 'between', 'beyond', 'bill', 'both', 'bottom', | |
| 'but', 'by', 'call', 'can', 'cannot', 'cant', 'dont', | |
| 'co', 'computer', 'con', 'could', 'couldnt', 'cry', | |
| 'de', 'describe', 'detail', 'do', 'done', 'down', | |
| 'due', 'during', 'each', 'eg', 'eight', 'either', | |
| 'eleven', 'else', 'elsewhere', 'empty', 'enough', 'etc', 'even', 'ever', 'every', | |
| 'everyone', 'everything', 'everywhere', 'except', 'few', 'fifteen', | |
| 'fify', 'fill', 'find', 'fire', 'first', 'five', | |
| 'for', 'former', 'formerly', 'forty', 'found', 'four', | |
| 'from', 'front', 'full', 'further', 'get', 'give', | |
| 'go', 'had', 'has', 'hasnt', 'have', 'he', | |
| 'hence', 'her', 'here', 'hereafter', 'hereby', 'herein', | |
| 'hereupon', 'hers', 'herself', 'him', 'himself', 'his', | |
| 'how', 'however', 'hundred', 'i', 'ie', 'if', | |
| 'in', 'inc', 'indeed', 'interest', 'into', 'is', | |
| 'it', 'its', 'itself', 'keep', 'last', 'latter', | |
| 'latterly', 'least', 'less', 'ltd', 'made', 'many', | |
| 'may', 'me', 'meanwhile', 'might', 'mill', 'mine', | |
| 'more', 'moreover', 'most', 'mostly', 'move', 'much', | |
| 'must', 'my', 'myself', 'name', 'namely', 'neither', | |
| 'never', 'nevertheless', 'next', 'nine', 'no', 'nobody', | |
| 'none', 'noone', 'nor', 'not', 'nothing', 'now', | |
| 'nowhere', 'of', 'off', 'often', 'on', 'once', | |
| 'one', 'only', 'onto', 'or', 'other', 'others', | |
| 'otherwise', 'our', 'ours', 'ourselves', 'out', 'over', | |
| 'own', 'part', 'per', 'perhaps', 'please', 'put', | |
| 'rather', 're', 'same', 'see', 'seem', 'seemed', | |
| 'seeming', 'seems', 'serious', 'several', 'she', 'should', | |
| 'show', 'side', 'since', 'sincere', 'six', 'sixty', | |
| 'so', 'some', 'somehow', 'someone', 'something', 'sometime', | |
| 'sometimes', 'somewhere', 'still', 'such', 'system', 'take', | |
| 'ten', 'than', 'that', 'the', 'their', 'them', | |
| 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', | |
| 'therefore', 'therein', 'thereupon', 'these', 'they', 'thick', | |
| 'thin', 'third', 'this', 'those', 'though', 'three', | |
| 'through', 'throughout', 'thru', 'thus', 'to', 'together', | |
| 'too', 'top', 'toward', 'towards', 'twelve', 'twenty', | |
| 'two', 'un', 'under', 'until', 'up', 'upon', | |
| 'us', 'very', 'via', 'was', 'we', 'well', | |
| 'were', 'what', 'whatever', 'when', 'whence', 'whenever', | |
| 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon', | |
| 'wherever', 'whether', 'which', 'while', 'whither', 'who', | |
| 'whoever', 'whole', 'whom', 'whose', 'why', 'will', | |
| 'with', 'within', 'without', 'would', 'yet', 'you', 'your', 'yours', | |
| 'yourself', 'yourselves' | |
| ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment