Created
December 20, 2022 23:07
-
-
Save OscarL/6bbe63baf1b683e2c7e5b4d93d37c2e3 to your computer and use it in GitHub Desktop.
Split Haikuporter's .patchset files ("git am" mailboxes).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! python3 | |
""" | |
patchset_split.py | |
Based on Paolo Bonzini's mbox_split.py: | |
https://gist.github.com/bonzini/d5bc1946475487167c529f9699e39512 | |
""" | |
import argparse | |
import email.parser | |
import email.header | |
import os | |
import re | |
import sys | |
# The original version from Paolo Bonzini expected a "Subject:" like this: | |
# "Subject: [ 2/2 ] webbrowser: Support for default browsers on Haiku" | |
# But we don't need that [0-9]+/[0-9]+. We want numbers to be incremental. | |
def subj_to_name(subj): | |
"""Convert a subject to a filename.""" | |
# You can write Perl in any language. - Edgar Dijkstra, probably. | |
def dashify(text): | |
text = re.sub("[^a-zA-Z0-9_-]", "-", text) | |
text = re.sub("--+", "-", text) | |
text = re.sub("^[.-]*", "", text) | |
return re.sub("[.-]*$", "", text) | |
subj = re.sub("\n\s+", " ", subj, re.S) | |
m = re.match(r"""\s* ( \[ [^]]* \] | \S+: )?""", subj, re.X) | |
area = "misc" | |
if m and m.group(1): | |
area = dashify(m.group(1)) | |
subj = subj[m.end() :] | |
text = dashify(subj.strip()) | |
return "%s-%s.patch" % (area, text) | |
def has_patch(body): | |
"""Return whether the body includes a patch.""" | |
return re.search( | |
b"""^---.* ^\\+\\+\\+.* ^@@ | |
|^diff.* ^index.* ^GIT binary patch | |
|^diff.* ^old mode .* ^new mode""", | |
body, | |
re.M | re.S | re.X, | |
) | |
def header_to_string(v): | |
"""Convert a MIME encoded header to Unicode.""" | |
return email.header.make_header(email.header.decode_header(v)) | |
def do_single(msg, num, output_dir, outfile=None): | |
"""Remove unnecessary headers from the message as well as | |
content-transfer-encoding, and print it to outfile or to | |
a file whose name is derived from the subject. If the | |
latter, the name of the file is printed to stdout.""" | |
def open_output_file(msg): | |
name = "%02d-%s" % (num, subj_to_name(msg["Subject"])) | |
name = os.path.join(output_dir, name) | |
print(name) | |
return open(name, "wb") | |
container = msg.get_payload(0) if msg.is_multipart() else msg | |
body = container.get_payload(decode=True) | |
if not args.keep_cr: | |
body = body.replace(b"\r\n", b"\n") | |
if not args.nopatch and not has_patch(body): | |
return | |
with outfile or open_output_file(msg) as f: | |
for k in ("From", "Subject", "Date", "Content-Type"): | |
if k in msg: | |
f.write(("%s: %s\n" % (k, header_to_string(msg[k]))).encode()) | |
f.write(b"\n") | |
f.write(body) | |
def split_mbox(filename, output_dir): | |
"""Split an mbox file and pass each part to a function func.""" | |
with open(filename, "rb") as mailbox: | |
patch_num = 0 | |
parser = None | |
for line in mailbox: | |
if line.startswith(b"From "): | |
# finish the previous message | |
if parser: | |
do_single(parser.close(), num=patch_num, output_dir=output_dir) | |
parser = None | |
else: | |
if not parser and line.strip() == b"": | |
continue | |
if line.startswith(b">From"): | |
line = line[1:] | |
if not parser: | |
parser = email.parser.BytesFeedParser() | |
patch_num += 1 | |
parser.feed(line) | |
if parser: | |
# Last patch | |
do_single(parser.close(), num=patch_num, output_dir=output_dir) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser( | |
description="Splits a given mailbox into separate patch files" | |
) | |
parser.add_argument( | |
"--nopatch", | |
action="store_true", | |
default=False, | |
help="exports even if it's not a patch", | |
) | |
parser.add_argument( | |
"--single", | |
action="store_true", | |
default=False, | |
help="do not split mbox file", | |
) | |
parser.add_argument( | |
"--keep-cr", | |
action="store_true", | |
default=False, | |
help=r"do not remove \r from lines ending with \r\n", | |
) | |
parser.add_argument( | |
"mbox", | |
metavar="<mailbox file>", | |
help='specifies the mailbox file', | |
) | |
parser.add_argument( | |
"output_dir", | |
metavar="<output_dir>", | |
default='.', | |
nargs='?', | |
help='place output files under this directory', | |
) | |
args = parser.parse_args() | |
args.output_dir = os.path.abspath(args.output_dir) | |
if not os.path.exists(args.output_dir) and not os.path.isfile(args.output_dir): | |
os.makedirs(args.output_dir, exist_ok=True) | |
if args.single: | |
infile = open(args.mbox, "rb") | |
msg = email.parser.BytesParser().parse(infile) | |
do_single(msg, args.output_dir, sys.stdout.buffer) | |
else: | |
split_mbox(args.mbox, args.output_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment