Skip to content

Instantly share code, notes, and snippets.

@imran31415
Created December 10, 2015 21:45
Show Gist options
  • Save imran31415/c698c4118f91e467bc78 to your computer and use it in GitHub Desktop.
Save imran31415/c698c4118f91e467bc78 to your computer and use it in GitHub Desktop.
import mailbox
import email.utils
from mboxparse import convert_to_json
filename = "metadatatest2525.mbox"
#the following returns a list containing message objects. Each message objects is composed of the components listed on line 20.
mbox = mailbox.mbox(filename)
#Get the first message in the mail box to test the below functions out with
message = mbox[0]
#Each message has the following attributes
#print message.keys()
#results in the following
#['X-GM-THRID', 'X-Gmail-Labels', 'MIME-Version', 'Received', 'Bcc', 'Date', 'Delivered-To', 'Message-ID', 'Subject', 'From', 'To', 'Cc', 'Content-Type']
#example looping with python
#loop through the messages and print the subject of each message
'''for message in mbox:
print message['subject']
'''
#this function parses the body of the message (not sure if you need this) and prints/returns it
def print_message_body(msg):
payload = msg.get_payload()
if msg.is_multipart():
div = ''
for subMsg in payload:
print div
print_message_body(subMsg)
div = '------------------------------'
else:
print msg.get_content_type()
#only prints the first 200 items
print payload[:200]
return payload
#This function prints the meta data of each message
def print_message_metadata(message):
for key,value in message.items():
print '\n'
print '{}: '.format(key)
print '\t{}: '.format(value)
#Print the metadata of the message
#print_message_metadata(message)
#Print the message body
#print_message_body(message)
#CONVERT MBOX FILE TO JSON FILE
#-------------
#uses the function from the other script
convert_to_json(filename, 'output_test_1.json')
#READ in JSON file
import json
with open('output_test_1.json', 'rU') as f:
data = json.load(f)
#print in first message
print data[0]
#returns the following:
{u'X-Gmail-Labels': u'Sent,Important', u'Delivered-To': u'[email protected]', u'From': u'John McCarthy <[email protected]>', u'MIME-Version': u'1.0', u'Received': u'by 10.114.78.7 with HTTP; Thu, 10 Dec 2015 08:00:08 -0800 (PST)', u'Cc': [u'JohnMcCarthy<[email protected]>'], u'X-GM-THRID': u'1520186905955984162', u'Bcc': [u'MikeMaelzer<[email protected]>'], u'To': [u'HeatherMcCarthy<[email protected]>', u'JohnMcCarthy<[email protected]>', u'SandraMcCarthy<[email protected]>', u'NeilJaffe<[email protected]>', u'MaryJaffe<[email protected]>'], u'parts': [{u'content': u'can someone "reply all" to this please (except the mysterious BCC person,\r\nwho can\'t)\r\n', u'contentType': u'text/plain'}, {u'content': u'can someone &quot;reply all&quot; to this please (except the mysterious BCC person, who can&#39;t)\xa0\n', u'contentType': u'text/html'}], u'Date': u'Thu, 10 Dec 2015 11:00:08 -0500', u'Message-ID': u'<CAFVywbLvJ-tJ34vB7u+XUPYpOqnj6TiC6O+sJohY35GeGVBNeQ@mail.gmail.com>', u'Content-Type': u'multipart/alternative; boundary=001a11c37a8a0ab77e05268d4da6', u'Subject': u'1-to-5 outbound (1 CC) (1 BCC)'}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment