Skip to content

Instantly share code, notes, and snippets.

@sbp
Created March 19, 2011 15:25
Show Gist options
  • Select an option

  • Save sbp/877534 to your computer and use it in GitHub Desktop.

Select an option

Save sbp/877534 to your computer and use it in GitHub Desktop.
Convert Mail.app messages into HTML
#!/usr/bin/env python
import re, glob, email.parser
def message_body(message):
maintype = message.get_content_maintype()
if maintype == 'text':
return message.get_payload(decode=True)
if maintype == 'multipart':
for part in message.walk():
if part.get_content_maintype() == 'text':
return part.get_payload(decode=True)
raise ValueError('No text part found')
raise ValueError(maintype)
r_label = re.compile(r'\[[^]]+\][ \t]+')
r_email = re.compile(r'<(.*?)>')
def encode(text):
text = text.replace('&', '&amp;')
return text.replace('<', '&lt;')
def neat(subject):
subject = r_label.sub('', subject)
subject = subject.replace('Re: Re:', 'Re:')
return encode(subject)
def name_and_email(sender):
address = r_email.search(sender).group(1)
other = r_email.sub('', sender)
return other.strip('" '), address
print '<link rel="stylesheet" href="http://goo.gl/NVkyD">'
print '<link rel="stylesheet" href="style.css">'
import dateutil.parser
counter = 1
messages = {}
unordered = []
for name in glob.glob('*.emlx'):
parser = email.parser.Parser()
with open(name) as f:
next(f)
bytes = ''.join(line for line in f)
message = parser.parsestr(bytes)
date = message['date']
t = dateutil.parser.parse(date)
unordered.append((t, message))
for t, message in sorted(unordered):
date = message['date']
sender = message['from']
person, address = name_and_email(sender)
subject = message['subject']
body = message_body(message)
messageid = message['message-id']
mid = r_email.search(messageid).group(1)
messages[mid] = counter
replied = 0
reply = message['in-reply-to']
if reply:
replymid = r_email.search(reply).group(1)
replied = messages.get(replymid, 0)
if replied:
subj = '<a href="#%s">%s</a>' % (replied, neat(subject))
else: subj = neat(subject)
print '<h2 id="%s">%s</h2>' % (counter, subj)
print
print '<p class="about">(<a href="#%s">#%s</a>) ' % (counter, counter)
print 'by <strong>%s</strong>' % encode(person)
address = encode(address).replace('@', '&#8857;')
print '&lt;%s><br>' % address
print encode(date)
print
print '<p class="body">'
parsed = []
for line in body.splitlines():
if '?xml' in line: break
if '-- Yahoo!' in line: break
parsed.append(line)
body = '\n'.join(parsed)
body = body.rstrip(' \t\r\n') + '\n'
for line in body.splitlines():
print encode(line) + '<br>'
print
counter += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment