Created
April 30, 2019 11:59
-
-
Save KrissN/dc012474c0c41e3bcd2776096543c9e5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
from email.parser import Parser | |
import re | |
import sys | |
known_quote_header_marks = ( | |
"-----Original Message-----", | |
"_____________________________________________", | |
"-----Original Appointment-----", | |
) | |
headerre = re.compile(r"^(\w+) ?: (.*)$") | |
from_words = ("From", "Von", "De") | |
sent_words = ("Sent", "Gesendet", "Envoyé") | |
def detect_quote_header(lines): | |
#print(lines) | |
if len(lines) < 4: | |
return None | |
m = headerre.match(lines[0]) | |
if m is not None and m.group(1) in from_words: | |
idx = 0 | |
elif lines[0].strip() not in known_quote_header_marks: | |
return None | |
else: | |
idx = 1 | |
if len(lines[idx].strip()) == 0: | |
idx += 1 | |
m = headerre.match(lines[idx]) | |
if m is None or m.group(1) not in from_words: | |
return None | |
sender = m.group(2) | |
if len(lines[idx].strip()) == 0: | |
idx += 1 | |
m = headerre.match(lines[idx+1]) | |
if m is None or m.group(1) not in sent_words: | |
return None | |
date = m.group(2) | |
end_idx = None | |
for i in range(idx+1, len(lines)): | |
if len(lines[i].strip()) == 0: | |
end_idx = i | |
break | |
if end_idx is None: | |
return None | |
for i in range(end_idx, len(lines)): | |
if len(lines[i].strip()) != 0: | |
end_idx = i | |
break | |
return (sender.strip(), date.strip(), end_idx) | |
def get_message_text(message): | |
charset = 'ascii' | |
for key, val in message.get_params(): | |
if key == 'charset': | |
charset = val | |
return (charset, message.get_payload(decode=True).decode(val)) | |
MAX_LEVEL = 5 | |
def requote(lines, level): | |
message_lines = [] | |
text = "" | |
for i in range(0, len(lines)): | |
info = detect_quote_header(lines[i:i+10]) | |
if info is None: | |
message_lines.append(lines[i]) | |
else: | |
sender, date, end_idx = info | |
text += ">" * level + " On {} {} wrote:\n".format(date, sender) | |
if level <= MAX_LEVEL: | |
text += requote(lines[i+end_idx:], level + 1) | |
else: | |
for line in lines[i+end_idx:]: | |
text += ">" * (level + 1) + " " + line + "\n" | |
break | |
for line in message_lines: | |
text += ">" * level + " " + line + "\n" | |
return text | |
def retrieve_text_from_multipart(message): | |
charset = None | |
text = None | |
for part in message.get_payload(): | |
#print(part.get_content_type(), part.items()) | |
if part.get_content_type() == 'text/plain' and 'Content-Disposition' not in part: | |
charset, text = get_message_text(part) | |
elif part.get_content_type() == 'multipart/alternative': | |
charset, text = retrieve_text_from_multipart(part) | |
return (charset, text) | |
def main(): | |
parser = Parser() | |
message = parser.parse(sys.stdin) | |
text = None | |
if message.is_multipart(): | |
charset, text = retrieve_text_from_multipart(message) | |
else: | |
charset, text = get_message_text(message) | |
if text is None: | |
print("> *** No usable payload ***") | |
return | |
lines = text.split('\n') | |
text = requote(lines, 1) | |
sys.stdout.buffer.write(text.encode('utf-8')) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment