rohitdholakia · December 30, 2011 06:54
diff --git a/classification.py b/classification.py
 '''Now given a mail, split it in terms of spaces  ,  then ,  add up the log probability of each .  Multiply it with the spam probability  . Do the same thing for non-spam 
   Whichever is higher  wins .  Lets start
 '''
 import sys,os
 def makeDict(f):
 	temp = {}
 	data = open(f,'r')
 	for line in data:
 		prob = line.split(" ")
 		temp[prob[0]] = prob[1]
 	return temp
 spamProbs = makeDict(sys.argv[1]) #Pass the spam log probs here 
 nonspamProbs = makeDict(sys.argv[2]) #Pass the non-spam log probs here

 toClassify = open(sys.argv[3],'r')
 for line in toClassify:
 	words = line.split(" ")
 	
 	spamP = 0
 	nonspamP = 0
 	for w in words:
 		try:
 			spamP = spamP + float(spamProbs[w].strip("\n"))
 		except:
 			
 			spamP = spamP + 1
 		try:
 			nonspamP = nonspamP + float(nonspamProbs[w].strip("\n"))
 		except:
 			
 			nonspamP = nonspamP + 1
 	totalSpamP = spamP * 0.5
 	totalnonSpamP = nonspamP * 0.5
 	
 	if(totalSpamP > totalnonSpamP):
 		print 'This mail is spam'
 	else:
 		print 'This mail is not spam'
	'''Now given a mail, split it in terms of spaces , then , add up the log probability of each . Multiply it with the spam probability . Do the same thing for non-spam
	Whichever is higher wins . Lets start
	'''
	import sys,os
	def makeDict(f):
	temp = {}
	data = open(f,'r')
	for line in data:
	prob = line.split(" ")
	temp[prob[0]] = prob[1]
	return temp
	spamProbs = makeDict(sys.argv[1]) #Pass the spam log probs here
	nonspamProbs = makeDict(sys.argv[2]) #Pass the non-spam log probs here

	toClassify = open(sys.argv[3],'r')
	for line in toClassify:
	words = line.split(" ")

	spamP = 0
	nonspamP = 0
	for w in words:
	try:
	spamP = spamP + float(spamProbs[w].strip("\n"))
	except:

	spamP = spamP + 1
	try:
	nonspamP = nonspamP + float(nonspamProbs[w].strip("\n"))
	except:

	nonspamP = nonspamP + 1
	totalSpamP = spamP * 0.5
	totalnonSpamP = nonspamP * 0.5

	if(totalSpamP > totalnonSpamP):
	print 'This mail is spam'
	else:
	print 'This mail is not spam'
No results found