Created
April 20, 2013 09:14
-
-
Save Cediddi/5425343 to your computer and use it in GitHub Desktop.
Spam Spam Spam!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print "Call 'instructions()' to get more info" | |
def instructions(): | |
from time import sleep | |
instructionsdata=""" IMPORTANT! | |
Please be sure that you have these libs: | |
nltk, unidecode | |
Example Usage and Output: | |
>>> inboX = MailFactory() | |
>>> inboX.new_server("[email protected]","yourpassword") | |
(\HasNoChildren) "/" "INBOX" | |
(\HasChildren \HasNoChildren \Junk) "/" "[Gmail]/Spam" | |
(\HasChildren \HasNoChildren \Drafts) "/" "[Gmail]/Taslaklar" | |
>>> inboX.fetch("INBOX",50) | |
>>> inboX.prettifier() | |
>>> spaM = MailFactory() | |
>>> spaM.new_server("[email protected]","yourpassword") | |
(\HasNoChildren) "/" "INBOX" | |
(\HasChildren \HasNoChildren \Junk) "/" "[Gmail]/Spam" | |
(\HasChildren \HasNoChildren \Drafts) "/" "[Gmail]/Taslaklar" | |
>>> spaM.fetch("[Gmail]/Spam",50) | |
>>> spaM.prettifier() | |
>>> doggY = SpamDog() | |
>>> doggY.new_spamdog() | |
"Your dog's name is Casey. It's a 42 years old Foxhound." | |
>>> doggY.trainer(inboX.pretty,"good") | |
>>> doggY.trainer(spaM.pretty,"bad") | |
>>> doggY.sniffer(inboX.pretty[5:] | |
Casey is whining, this is a good mail | |
Casey is whining, this is a good mail | |
Casey is barking, this isn't looking good | |
Casey is whining, this is a good mail | |
Casey forgot how to dog." | |
>>> doggY.sniffer(spaM.pretty[5:] | |
Casey is barking, this isn't looking good | |
Casey is barking, this isn't looking good | |
Casey forgot how to dog." | |
Casey is barking, this isn't looking good | |
Casey is barking, this isn't looking good | |
""" | |
for line in instructionsdata.split("\n"): | |
print line | |
sleep(0.7) | |
class MailFactory: | |
""" | |
The MailFactory gives you a chance to get your mails | |
""" | |
def new_server(self,username,password): | |
""" | |
Creates 'Factory.server' and prints available folders | |
""" | |
import imaplib | |
self.server = imaplib.IMAP4_SSL('imap.googlemail.com') #Creating a imap client | |
self.server.login(username,password) #Logging in to imap server | |
for folder in self.server.list()[1]: #Prints available folders | |
print folder #Prints available folders | |
def fetch(self,mailbox,number): | |
""" | |
Fetches the mailbox then creates 'Factory.fetched' | |
""" | |
self.server.select(mailbox) #Selecting Inbox | |
self.m_index = list() #Mail index list | |
whole = list() #Raw mail list | |
self.fetched = list() #Fetched mail bodies | |
result , data = self.server.search(None , "ALL") #Getting all items in inbox | |
self.m_index = data[0].split() #Splitting uids and saving into mail index | |
#for i in range(1,number+1): #Testing Purposes Only! | |
# self.m_index.append(i) #Testing Purposes Only! | |
wanted = self.m_index[:number] | |
for i in wanted: | |
whole.append(self.server.fetch(i,"(RFC822)")) #Fetching mails with uids | |
for mail in whole: | |
self.fetched.append(mail[1][0][1]) #Refining mails from whole mails | |
def prettifier(self): | |
""" | |
Prettifies the fetched data and creates 'Factory.pretty' | |
""" | |
import email , re, nltk, HTMLParser | |
from unidecode import unidecode | |
parser = HTMLParser.HTMLParser() | |
self.pretty = list() #Prettified mail bodies | |
for mail in self.fetched: | |
msg = email.message_from_string(mail) #Creating an email | |
for part in msg.walk(): #Checks for plain body parts to avoid unwanted parts | |
if part.get_content_maintype()=='multipart': #Checks for plain body parts to avoid unwanted parts | |
continue #Checks for plain body parts to avoid unwanted parts | |
if part.get_content_subtype() !='plain': #Checks for plain body parts to avoid unwanted parts | |
continue #Checks for plain body parts to avoid unwanted parts | |
qpm = email.quoprimime #Creates an Quoted Printable Decoder | |
payload = qpm.decodestring(qpm.decodestring(part.get_payload())) #Double decodes for unwanted problems | |
try: #Tries decoding | |
decoded = unidecode(unicode(parser.unescape(nltk.clean_html(payload.decode("windows-1254"))))) #First Decodes with a property defacto standard, Then removes | |
#tags, Then parses escaped html characters, Then Unicodes | |
#whole thing, Then decodes unicode to avoid turkish characters. | |
if " " in decoded: #Checks for base64 encoded mails | |
self.pretty.append(decoded) #Checks for base64 encoded mails | |
else: #Checks for base64 encoded mails | |
print "A Base64 Mail Ignored" #Checks for base64 encoded mails | |
except Exception , e: #Throws an exception | |
print e , part.get_content_charset() #Prints why it's ignored | |
class SpamDog: | |
""" | |
The SpamDog Class gives you a chance to adapt a K9 | |
""" | |
def new_spamdog(self): | |
""" | |
Hires a dog from local K9 breeder | |
""" | |
from random import choice, randint | |
import docclass | |
self.K9 = docclass.naivebayes(docclass.getwords) | |
names = ["Max", "Bella", "Buddy", "Molly", "Rocky", "Lucy", "Bailey", "Maggie", "Jake", "Daisy", #Lovely dog names | |
"Charlie", "Sophie", "Jack", "Sadie", "Toby", "Chloe", "Cody", "Bailey", "Buster", "Lola", "Duke", #Lovely dog names | |
"Zoe", "Cooper", "Abby", "Riley", "Ginger", "Harley", "Roxy", "Bear", "Gracie", "Tucker", "Coco", #Lovely dog names | |
"Murphy", "Sasha", "Lucky", "Lily", "Oliver", "Angel", "Sam", "Princess", "Oscar", "Emma", "Teddy", #Lovely dog names | |
"Annie", "Winston", "Rosie", "Sammy", "Ruby", "Rusty", "Lady", "Shadow", "Missy", "Gizmo", "Lilly", #Lovely dog names | |
"Bentley", "Mia", "Zeus", "Katie", "Jackson", "Zoey", "Baxter", "Madison", "Bandit", "Stella", "Gus", #Lovely dog names | |
"Penny", "Samson", "Belle", "Milo", "Casey", "Rudy", "Samantha", "Louie", "Holly", "Hunter", "Lexi", #Lovely dog names | |
"Casey", "Lulu", "Rocco", "Brandy", "Sparky", "Jasmine", "Joey", "Shelby", "Bruno", "Sandy", "Beau", #Lovely dog names | |
"Roxie", "Dakota", "Pepper", "Maximus", "Heidi", "Romeo", "Luna", "Boomer", "Dixie", "Luke", "Honey", #Lovely dog names | |
"Henry", "Dakota"] #Lovely dog names | |
breeds = ["AustralianShepherd", "Beagle", "BelgianMalinois", "BassetHound", "EnglishCockerSpaniel", #Ideal K9 breeds | |
"EnglishSpringerSpaniel", "Foxhound", "GermanShepherd", "LabradorRetriever", "SulimovDog", #Ideal K9 breeds | |
"Schnauzer", "Weimaraner"] #Ideal K9 breeds | |
self.nameof = choice(names) #Choices a name for your SpamDog | |
self.ageof = randint(7,92) #Choices an age in dog ears | |
self.breedof = choice(breeds) #Choices a breed for your SpamDog | |
print "Your dog's name is %s. It's a %d years old %s." % (self.nameof, self.ageof, self.breedof) #Prints some info | |
def trainer(self, maillist, karma): | |
""" | |
Trains The SpamDog with maillist you give and karma you want | |
""" | |
for mail in maillist: | |
import re | |
refined_mail = re.sub("\n|\r| +"," ", mail) #Regex for cleaning whitespace and whitelines | |
self.K9.train(refined_mail,karma.lower()) #Trains your K9 with information | |
def sniffer(self, maillist): | |
""" | |
Lets The SpamDog to sniff the stuff and bark or whine. | |
""" | |
self.K9.setthreshold("bad",3.0) #Sets the threshold for bad words | |
for mail in maillist: | |
stat = self.K9.classify(mail) #Sniffes the mail | |
if stat == "good": #Prints mail info that you gave | |
print "%s is whining, this is a good mail" % (self.nameof) #Prints mail info that you gave | |
elif stat == "bad": #Prints mail info that you gave | |
print "%s is barking, this isn't looking good" % (self.nameof) #Prints mail info that you gave | |
elif stat == "unknown": #Prints mail info that you gave | |
print "%s needs more training, Not enough data" % (self.nameof) #Prints mail info that you gave | |
elif stat == None: #Prints mail info that you gave | |
print "%s forgot how to dog. It might be %100 good or %100 bad" % (self.nameof) #Prints mail info that you gave | |
else: | |
print "I also forgot how to throw exception" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment