Last active
December 28, 2018 16:41
-
-
Save davidmintz/4eeb54f86a55bb351aa05c5f9db5bf06 to your computer and use it in GitHub Desktop.
Python 2.x script for downloading, archiving and removing emails from an IMAP folder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
download, store and delete old messages from a folder on an IMAP account. | |
kind of crude in that you have to hard-code values that could be taken as | |
command-line options, and who knows what else is wrong with this as my | |
first Python effort of any consequence. | |
""" | |
# to do: make python3 compatible? process command line options instead of hard-coding | |
import imaplib, email, mailbox, re, os.path, logging, sys, time | |
log = logging | |
log.basicConfig(filename="mail-archive.log",format='%(asctime)s %(levelname)s: %(message)s',datefmt="%Y-%m-%d %I:%M:%S %p",level=logging.DEBUG) | |
# cutoff year. anything older than 01-Jan of 'until_year' gets downloaded and deleted. | |
until_year = "2017" | |
# IMAP connection parameters | |
hostname = 'your_imap_host' | |
user = 'your_username' | |
password = 'your_password' | |
# name of folder to prune | |
folder_name = 'folder-to-prune' | |
# path to mbox files | |
mbox_path = "/path/to/your/email/archive" | |
# regexp for parsing year from email date header | |
pattern = re.compile(r'\b201[\d]\b') | |
log.info('starting. cutoff is '+until_year) | |
connection = imaplib.IMAP4(hostname) | |
connection.login(user, password) | |
type, data = connection.select(folder_name); | |
num_messages = data[0] | |
log.info("{} messages in the damn box".format(num_messages)) | |
type, message_numbers_string = connection.search(None, 'ALL') | |
# print "{} is the response we got".format(type) | |
nums = message_numbers_string[0].split(); | |
mbox = None | |
deleted = 0 | |
# http://stackoverflow.com/questions/29432981/saving-imap-messages-with-python-mailbox-module | |
for n in nums: | |
response = None | |
try: | |
response, msg = connection.fetch(n,'(RFC822)') | |
except Exception as e: | |
log.warning("caught exception fetching message {}, will try again".format(n)) | |
for i in range(1,6): | |
# try again | |
log.info("sleeping for a few seconds") | |
print ("exception caught, trying again...") | |
time.sleep(5) | |
try: | |
response, msg = connection.fetch(n,'(RFC822)') | |
if (response == "OK"): | |
log.info("retry #{} worked, moving on".format(i)) | |
break | |
except: | |
log.warning("shit failed again at retry#".format(i)) | |
if not response: | |
print("shit is not working: {}".format(e.message)) | |
print ("reluctantly moving on") | |
# "Returned data are tuples of message part envelope and data" | |
data = msg[0][1] | |
for response_part in msg: | |
if isinstance(response_part, tuple): | |
msg_obj = email.message_from_string(response_part[1]) | |
date_header = msg_obj['date'] | |
year = pattern.search(date_header).group() | |
sys.stdout.write('year: {}, processing: {} of {}\r'.format(year, n, num_messages, )) | |
sys.stdout.flush() | |
if year >= until_year: | |
# log.info("{} >= cutoff year {}, skipping".format(year, until_year)) | |
continue | |
path = mbox_path + "/" + year | |
if not os.path.isfile(path) : | |
log.info("creating mbox: "+year) | |
f = open(path,'w') | |
f.close() | |
# is there a mailbox instance? | |
if not mbox: | |
mbox = mailbox.mbox(path) | |
# is it the right one? | |
if year not in mbox._path: | |
# no, time for a new mbox instance | |
# log.info("{} is not for {}, instantiating a new mbox".format(mbox._path,year)) | |
mbox.close() | |
mbox = mailbox.mbox(path) | |
mbox.add(data) | |
connection.store(n, '+FLAGS', '\\Deleted') | |
deleted += 1 | |
log.info("deleted {} messages".format(deleted)) | |
log.info("expunging messages") | |
connection.expunge() | |
log.info("closing connection, logging out") | |
connection.close() | |
connection.logout() | |
log.info("done") | |
exit(0) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment