tripleee · February 9, 2021 11:51 · tripleee · Feb 9, 2021 · tripleee · Feb 9, 2021
diff --git a/rmhtml.py b/rmhtml.py
 from email import message_from_bytes
 from email.iterators import typed_subpart_iterator, body_line_iterator
 from email.policy import default


 def nontrivial(textpart):
    """
    Check that this part contains at least three lines of text.
    """
    for idx, _ in enumerate(body_line_iterator(textpart)):
        if idx == 2:
            return True
    return False

 def scrubbed(message):
    """
    Examine message; if it has a multipart/alternative structure
    with text/plain and text/html parts, and nontrivial() is True
    for the text/plain part, remove the text/html part and return
    True. Otherwise, return False. The message is modified in-place
    if the conditions are met.
    """
    text = None
    html = None
    for container in typed_subpart_iterator(
            message, maintype='multipart', subtype='alternative'):
        text = None
        html = None
        for part in typed_subpart_iterator(container):
            ctype = part.get_content_type()
            if ctype == 'text/plain' and nontrivial(part):
                text = part
            elif ctype == 'text/html':
                html = part
            if text and html:
                # FIXME: meddles with the internals;
                # calls get_content_type anew
                for idx, payload in enumerate(container._payload):
                    if payload.get_content_type() == 'text/html':
                        container._payload.pop(idx)
                        break
                return True
    return False

 def killhtml_maybe(filename):
    """
    Read email from filename; replace with a scrubbed version
    if scrubbed() succeeds.
    """
    with open(filename, 'rb') as handle:
        message = message_from_bytes(handle.read(), policy=default)
    if scrubbed(message):
        with open(filename, 'wb') as handle:
            handle.write(message.as_bytes())


 def main():
    import sys
    for filename in sys.argv[1:]:
        killhtml_maybe(filename)


 if __name__ == '__main__':
    main()
	from email import message_from_bytes
	from email.iterators import typed_subpart_iterator, body_line_iterator
	from email.policy import default


	def nontrivial(textpart):
	"""
	Check that this part contains at least three lines of text.
	"""
	for idx, _ in enumerate(body_line_iterator(textpart)):
	if idx == 2:
	return True
	return False

	def scrubbed(message):
	"""
	Examine message; if it has a multipart/alternative structure
	with text/plain and text/html parts, and nontrivial() is True
	for the text/plain part, remove the text/html part and return
	True. Otherwise, return False. The message is modified in-place
	if the conditions are met.
	"""
	text = None
	html = None
	for container in typed_subpart_iterator(
	message, maintype='multipart', subtype='alternative'):
	text = None
	html = None
	for part in typed_subpart_iterator(container):
	ctype = part.get_content_type()
	if ctype == 'text/plain' and nontrivial(part):
	text = part
	elif ctype == 'text/html':
	html = part
	if text and html:
	# FIXME: meddles with the internals;
	# calls get_content_type anew
	for idx, payload in enumerate(container._payload):
	if payload.get_content_type() == 'text/html':
	container._payload.pop(idx)
	break
	return True
	return False

	def killhtml_maybe(filename):
	"""
	Read email from filename; replace with a scrubbed version
	if scrubbed() succeeds.
	"""
	with open(filename, 'rb') as handle:
	message = message_from_bytes(handle.read(), policy=default)
	if scrubbed(message):
	with open(filename, 'wb') as handle:
	handle.write(message.as_bytes())


	def main():
	import sys
	for filename in sys.argv[1:]:
	killhtml_maybe(filename)


	if __name__ == '__main__':
	main()
No results found