Last active
January 22, 2018 13:23
-
-
Save fbriere/e86584a807449e3128c0 to your computer and use it in GitHub Desktop.
Remove all binary attachments from email messages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# vim: set fileencoding=utf-8: | |
# Remove all binary attachments from email messages | |
# | |
# This is a standard UNIX filter; it reads a message or mailbox from standard | |
# input, and outputs it again on standard output, with all binary attachments | |
# removed (with message/external-body). | |
# | |
# Written by Frédéric Brière <[email protected]>. Copy at will. | |
import email | |
import email.message | |
import optparse | |
import sys | |
import time | |
# Adapted from <http://code.activestate.com/recipes/576553/>, originally | |
# written by Romain Dartigues. | |
def messages_list(input): | |
"""Extract a list of messages from a mailbox. | |
This parses a mbox mailbox, and returns a list of messages, as | |
email.message.Message objects. Unlike the mailbox module, this allows | |
reading from standard input. | |
""" | |
lines = [] | |
while True: | |
line = input.readline() | |
if line[:5] == 'From ' or line == '': | |
if lines: | |
yield email.message_from_string(''.join(lines)) | |
lines = [] | |
if line == '': | |
return | |
else: | |
lines.append(line) | |
def gut_message(message): | |
"""Remove body from a message, and wrap in a message/external-body.""" | |
wrapper = email.message.Message() | |
wrapper.add_header('Content-Type', 'message/external-body', | |
access_type='x-spam-deleted', | |
expiration=time.strftime("%a, %d %b %Y %H:%M:%S %z"), | |
size=str(len(message.get_payload()))) | |
message.set_payload('') | |
wrapper.set_payload([message]) | |
return wrapper | |
def message_is_binary(message): | |
"""Determine if a non-multipart message is of binary type.""" | |
return message.get_content_maintype() not in set(['text', 'message']) | |
def clean_message(message): | |
"""Clean a message of all its binary parts. | |
This guts all binary attachments, and returns the message itself for | |
convenience. | |
""" | |
if message.is_multipart(): | |
# Don't recurse in already-deleted attachments | |
if message.get_content_type() != 'message/external-body': | |
parts = message.get_payload() | |
parts[:] = map(clean_message, parts) | |
elif message_is_binary(message): | |
# TODO: Don't gut if this is the topmost message | |
message = gut_message(message) | |
return message | |
def main(): | |
usage = 'Usage: %prog [ --mbox ]' | |
parser = optparse.OptionParser(usage=usage) | |
parser.add_option('--mbox', action='store_true', default=False, | |
help='Input is in mbox format') | |
(options, args) = parser.parse_args() | |
if options.mbox: | |
messages = messages_list(sys.stdin) | |
else: | |
messages = [email.message_from_string(''.join(sys.stdin.readlines()))] | |
for message in messages: | |
print clean_message(message).as_string(unixfrom=True) | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment