Last active
May 29, 2017 19:58
-
-
Save davidfraser/caf71b3b60da117f79b4df5ccf1d398b to your computer and use it in GitHub Desktop.
A simple script to remove superfluous entries from a mbsync internal .journal file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""This is a script for internal use when doing large mail migrations with mbsync | |
See http://isync.sourceforge.net/mbsync.html for more information on the product | |
If IMAP disconnects occur on a large mailbox, the internal .journal file that tracks progress | |
can grow very large in size, though much of the information is not needed | |
This script can be used to reduce the file in size | |
NB: This should be used with caution, and never while mbsync is running | |
It does not replace the file, but can be used to produce a new journal file, | |
which can be moved in place after the old one is backed up | |
""" | |
import sys | |
def filter_journal(infile): | |
lines = infile.readlines() | |
exclude_lines = set() | |
msg_lines = {} | |
for n, line in enumerate(lines): | |
if not line.strip(): | |
continue | |
cmd = line[0] | |
if cmd in '#&<>~\\/*-': | |
msg_id = line[2:].split()[0] | |
msg_lines.setdefault(msg_id, {}).setdefault(cmd, []).append(n) | |
for msg, msg_cmds in msg_lines.items(): | |
for cmd, cmd_lines in msg_cmds.items(): | |
if len(cmd_lines) > 1: | |
for n in cmd_lines[:-1]: | |
exclude_lines.add(n) | |
for n, line in enumerate(lines): | |
if n not in exclude_lines: | |
yield line | |
if __name__ == '__main__': | |
sys.stdout.writelines(filter_journal(sys.stdin)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment