Created
November 3, 2011 14:19
-
-
Save zhasm/1336591 to your computer and use it in GitHub Desktop.
get unread Gmails, print the Subject, From, To, Message-ID, without modifying the gmails' flags.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import re | |
import imaplib2 as imaplib | |
from email import Header | |
from email.parser import Parser | |
##USER CONFIG | |
HOST = 'imap.gmail.com' | |
PORT = '993' | |
USER = '' | |
PAWD = '' | |
HAM_F = 'INBOX' | |
SPAM_F = '0_learn_spam' #'[Gmail]/Spam' by default | |
LIMIT = 20 #howmany mails to read each time. | |
##Filter config. you need to know some regex | |
filters = { | |
"To": re.compile(r"""(?xi) | |
msn\d{3,8}\@fanfou\.com | | |
(?:coderex|evernote)\@zhasm\.com | |
"""), | |
"From": re.compile(r"""(?xi) | |
vipcareer\.net | | |
calendar-notification\@google\.com | | |
Tmall\@newsletter\.mail\.taobao\.com | | |
[email protected] | | |
培训 | |
"""), | |
"Subject": re.compile(r"""(?x) | |
(AD)| | |
\(AD\)| | |
^\.$ | |
"""), | |
"Message-ID": re.compile(r"\.ru>$"), | |
} | |
## system config | |
def GetUID(s): | |
"""the global ID for emails""" | |
pattern_uid = re.compile(r'(?<=UID )(\d+)') | |
try: | |
return pattern_uid.findall(s)[0] | |
except Exception, e: | |
print "Error!", str(e) | |
return "" | |
def Filter(headers): | |
"""to detect if it is spam | |
if any of the filters matches, the message is considered spammy. | |
aggressive, but simple to handle. | |
""" | |
for header_name, header_value in headers.items(): | |
if filters[header_name].search(header_value): | |
#print "Match!", header_name, header_value, headers | |
return True | |
return False | |
def DecodeSingleHeader(s): | |
""" | |
decode headers to UTF-8 | |
""" | |
parts = Header.decode_header(s) | |
header = [] | |
for part in parts: | |
s, enc=part | |
if enc: | |
s = unicode(s , enc).encode('utf8', 'replace') | |
header.append(s) | |
h= " ".join(header) | |
return h | |
def StringToHeaders(s): | |
return Parser().parsestr(s) | |
def TrimFrom(s): | |
""" | |
only necessary for "From" header. | |
""" | |
s=re.sub(r'''\?=["']<''', '?= <', s) | |
s=s.replace('"', '') | |
s=s.replace("'", '') | |
return s | |
def MarkUnread(imap, uids): | |
""" | |
mark a list of uids as Read. Implementation: remove the SEEN flag. | |
""" | |
if not uids: | |
return | |
uids=','.join(uids) | |
print "marking unread", uids | |
typ, data = imap.uid('STORE', uids, '-FLAGS', "\SEEN") | |
print typ, data | |
def Move(imap, ham_f, spam_f, uids): | |
""" | |
move uids from one folder to another. | |
implementation: Add new flag and delete from old folder. | |
""" | |
spam_f= '"%s"'% spam_f | |
uids=",".join(uids) | |
print "moving spams:", uids | |
try: | |
typ, data = imap.uid('STORE', uids, '+X-GM-LABELS', spam_f) | |
print data | |
if typ=='OK': | |
mov, data = imap.uid('STORE', uids , '+FLAGS', '(\Deleted)') | |
print data | |
imap.expunge() | |
except Exception, e: | |
print e | |
g = imaplib.IMAP4_SSL(HOST) | |
try: | |
r, info=g.login(USER, PAWD) | |
except Exception, e: | |
print str(e) | |
g.select(HAM_F) | |
#typ, msg_ids = g.search(None, ('UNSEEN')) | |
typ, msg_ids = g.search(None, ('ALL')) | |
spam=[] | |
ham=[] | |
if typ=='OK': | |
ids= msg_ids[0].split(' ') | |
for id in ids[:LIMIT]: | |
(r,msg)=g.fetch(id, '(UID BODY[HEADER.FIELDS (SUBJECT FROM TO MESSAGE-ID)])') | |
header=msg[0][1] | |
uid= GetUID(msg[0][0]) | |
h=StringToHeaders(header) | |
header={} | |
for i in ['From', 'To', 'Subject', 'Message-ID' ]: | |
if h.has_key(i): | |
v=h[i] | |
if i.lower() == 'from': | |
v=TrimFrom(v) | |
header[i]=DecodeSingleHeader(v) | |
if Filter(header): | |
spam.append(uid) | |
print "SPAM:" | |
else: | |
ham.append(uid) | |
print "HAM:" | |
for k, v in header.items(): | |
print k, ": ", v | |
Move(g, HAM_F, SPAM_F, spam) | |
MarkUnread(g, ham) | |
g.logout() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment