Created
March 31, 2020 12:27
-
-
Save asadamatic/d2cc3575ccb1a28ff6ee293b27d4db63 to your computer and use it in GitHub Desktop.
This is a spam messages filter is based on naive Bayes Classifier.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
spamMessage = {'million': 156, 'dollars': 29, 'adclick': 51, 'conferences': 2} | |
hamMessage = {'million': 98, 'dollars': 119, 'adclick': 1, 'conferences': 12} | |
spamWordCount = 95791 | |
hamWordCount = 306438 | |
probabilitySpam = 0.00 | |
probabilityHam = 0.00 | |
def likelihood(word): | |
probabilitySpam = spamMessage[word] / spamWordCount | |
probabilityHam = hamMessage[word] / hamWordCount | |
likelihoodRatio = probabilitySpam / probabilityHam | |
return likelihoodRatio | |
baseRatio = 1 | |
def checkSpam(message): | |
message = message.replace('.', '') | |
for word in message.split(): | |
if word in spamMessage: | |
global baseRatio | |
baseRatio = likelihood(word) * baseRatio | |
if baseRatio < 1: #Condition for a ham message | |
print('The given message is not spam') | |
elif baseRatio >= 1: #Condition for a spam message | |
print('The given message is spam') | |
# Since we are calculating probabilities and likelihood with respect to 'spam' messages we | |
# consider a fraction yielding greater than one as a spam message | |
checkSpam('million dollars question for you.') | |
print(baseRatio) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment