Created
September 13, 2010 12:43
-
-
Save akaihola/577229 to your computer and use it in GitHub Desktop.
This is a script I used to clean up the havoc Thunderbird caused by copying thousands of messages from INBOX as five copies to a folder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Helpers for removing duplicate messages in an IMAP mailbox | |
Dependency: IMAPClient (easy_install IMAPClient) | |
Author: Antti Kaihola <[email protected]> | |
License: New BSD license | |
""" | |
import collections | |
import imapclient | |
class IMAPDeDupper(object): | |
def __init__(self, client): | |
self.client = client | |
def get_all_messages(self): | |
uids = self.client.search() | |
return self.client.fetch(uids, ('internaldate', 'envelope',)) | |
def make_fingerprint(self, msg): | |
return msg['INTERNALDATE'], msg['ENVELOPE'][:9] | |
def group_messages_by_fingerprint(self): | |
"""Returns a dict grouping duplicate messages in current folder | |
Message fingerprints as returned by ``make_fingerprint`` are | |
used as dict keys, and lists of message UIDs as dict values. | |
""" | |
msgs = self.get_all_messages() | |
grouped = collections.defaultdict(list) | |
for uid, msg in msgs.iteritems(): | |
fingerprint = self.make_fingerprint(msg) | |
grouped[fingerprint].append(uid) | |
return grouped | |
def get_duplicates(self): | |
"""Returns UIDs of duplicate and unique messages in the current folder | |
Return value: a 2-tuple of lists ``([<duplicates>], [<uniques>])`` | |
For sets of duplicate messages, the first message is not | |
included in the lists, and following messages are added to | |
the ``duplicates`` list. | |
""" | |
grouped = self.group_messages_by_fingerprint() | |
uniques = [] | |
duplicates = [] | |
for fingerprint, uids in grouped.iteritems(): | |
if len(uids) > 1: | |
duplicates.extend(uids[1:]) | |
else: | |
uniques.append(uids[0]) | |
return duplicates, uniques | |
def get_matching(self, fingerprints): | |
"""Deletes matching messages in the current folder | |
Given a list (or dict with keys) of message fingerprints, | |
searches the current folder for messages matching any of the | |
fingerprints. | |
Return value: a 2-tuple of UID lists | |
``([<matching>], [<nonmatching>])`` | |
""" | |
grouped = self.group_messages_by_fingerprint() | |
matching = [] | |
nonmatching = [] | |
for fingerprint, uids in grouped.iteritems(): | |
if fingerprint in fingerprints: | |
matching.extend(uids) | |
else: | |
nonmatching.extend(uids) | |
return matching, nonmatching | |
def example(): | |
c = imapclient.IMAPClient('imap.mydomain.com') | |
c.login('login', 'password') | |
d = IMAPDeDupper(c) | |
# delete duplicate messages in folder ``2009`` | |
c.select_folder('2009') | |
dups, uniques = d.get_duplicates() | |
print 'In folder "2009", deleting message UIDS:\n%s' % repr(dups) | |
c.delete_messages(dups) | |
c.expunge() | |
# delete inbox messages which exist in folder ``2009`` | |
c.select_folder('2009') | |
twoten = d.group_messages_by_fingerprint() | |
c.select_folder('INBOX') | |
dups, uniqs = d.get_matching(twoten) | |
print 'In the inbox, deleting message UIDS:\n%s' % repr(dups) | |
c.delete_messages(dups) | |
c.expunge() | |
if __name__ == '__main__': | |
pass | |
# TODO: implement command line interface |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment