Created
March 19, 2011 15:25
-
-
Save sbp/877534 to your computer and use it in GitHub Desktop.
Convert Mail.app messages into HTML
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import re, glob, email.parser | |
| def message_body(message): | |
| maintype = message.get_content_maintype() | |
| if maintype == 'text': | |
| return message.get_payload(decode=True) | |
| if maintype == 'multipart': | |
| for part in message.walk(): | |
| if part.get_content_maintype() == 'text': | |
| return part.get_payload(decode=True) | |
| raise ValueError('No text part found') | |
| raise ValueError(maintype) | |
| r_label = re.compile(r'\[[^]]+\][ \t]+') | |
| r_email = re.compile(r'<(.*?)>') | |
| def encode(text): | |
| text = text.replace('&', '&') | |
| return text.replace('<', '<') | |
| def neat(subject): | |
| subject = r_label.sub('', subject) | |
| subject = subject.replace('Re: Re:', 'Re:') | |
| return encode(subject) | |
| def name_and_email(sender): | |
| address = r_email.search(sender).group(1) | |
| other = r_email.sub('', sender) | |
| return other.strip('" '), address | |
| print '<link rel="stylesheet" href="http://goo.gl/NVkyD">' | |
| print '<link rel="stylesheet" href="style.css">' | |
| import dateutil.parser | |
| counter = 1 | |
| messages = {} | |
| unordered = [] | |
| for name in glob.glob('*.emlx'): | |
| parser = email.parser.Parser() | |
| with open(name) as f: | |
| next(f) | |
| bytes = ''.join(line for line in f) | |
| message = parser.parsestr(bytes) | |
| date = message['date'] | |
| t = dateutil.parser.parse(date) | |
| unordered.append((t, message)) | |
| for t, message in sorted(unordered): | |
| date = message['date'] | |
| sender = message['from'] | |
| person, address = name_and_email(sender) | |
| subject = message['subject'] | |
| body = message_body(message) | |
| messageid = message['message-id'] | |
| mid = r_email.search(messageid).group(1) | |
| messages[mid] = counter | |
| replied = 0 | |
| reply = message['in-reply-to'] | |
| if reply: | |
| replymid = r_email.search(reply).group(1) | |
| replied = messages.get(replymid, 0) | |
| if replied: | |
| subj = '<a href="#%s">%s</a>' % (replied, neat(subject)) | |
| else: subj = neat(subject) | |
| print '<h2 id="%s">%s</h2>' % (counter, subj) | |
| print '<p class="about">(<a href="#%s">#%s</a>) ' % (counter, counter) | |
| print 'by <strong>%s</strong>' % encode(person) | |
| address = encode(address).replace('@', '⊙') | |
| print '<%s><br>' % address | |
| print encode(date) | |
| print '<p class="body">' | |
| parsed = [] | |
| for line in body.splitlines(): | |
| if '?xml' in line: break | |
| if '-- Yahoo!' in line: break | |
| parsed.append(line) | |
| body = '\n'.join(parsed) | |
| body = body.rstrip(' \t\r\n') + '\n' | |
| for line in body.splitlines(): | |
| print encode(line) + '<br>' | |
| counter += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment