Skip to content

Instantly share code, notes, and snippets.

@imran31415
Created December 10, 2015 21:37
Show Gist options
  • Save imran31415/2aaca2b89fa9988909a3 to your computer and use it in GitHub Desktop.
Save imran31415/2aaca2b89fa9988909a3 to your computer and use it in GitHub Desktop.
import sys
import mailbox
import email
import quopri
import json
from BeautifulSoup import BeautifulSoup
def cleanContent(msg):
msg = quopri.decodestring(msg)
soup = BeautifulSoup(msg)
return ''.join(soup.findAll(text=True))
def jsonifyMessage(msg):
json_msg = {'parts': []}
for (k, v) in msg.items():
json_msg[k] = v.decode('utf-8', 'ignore')
for k in ['To', 'Cc', 'Bcc']:
if not json_msg.get(k):
continue
json_msg[k] = json_msg[k].replace('\n', '').replace('\t', '').replace('\r'
, '').replace(' ', '').decode('utf-8', 'ignore').split(',')
try:
for part in msg.walk():
json_part = {}
if part.get_content_maintype() == 'multipart':
continue
json_part['contentType'] = part.get_content_type()
content = part.get_payload(decode=False).decode('utf-8', 'ignore')
json_part['content'] = cleanContent(content)
json_msg['parts'].append(json_part)
except Exception, e:
sys.stderr.write('Skipping message - error encountered (%s)\n' % (str(e), ))
finally:
return json_msg
class Encoder(json.JSONEncoder):
def default(self, o): return list(o)
# The generator itself...
def gen_json_msgs(mb):
while 1:
msg = mb.next()
if msg is None:
break
yield jsonifyMessage(msg)
def convert_to_json(MBOX, OUT_FILE):
mbox = mailbox.UnixMailbox(open(MBOX, 'rb'), email.message_from_file)
json.dump(gen_json_msgs(mbox),open(OUT_FILE, 'wb'), indent=4, cls=Encoder)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment