Created
December 10, 2015 21:45
-
-
Save imran31415/c698c4118f91e467bc78 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mailbox | |
import email.utils | |
from mboxparse import convert_to_json | |
filename = "metadatatest2525.mbox" | |
#the following returns a list containing message objects. Each message objects is composed of the components listed on line 20. | |
mbox = mailbox.mbox(filename) | |
#Get the first message in the mail box to test the below functions out with | |
message = mbox[0] | |
#Each message has the following attributes | |
#print message.keys() | |
#results in the following | |
#['X-GM-THRID', 'X-Gmail-Labels', 'MIME-Version', 'Received', 'Bcc', 'Date', 'Delivered-To', 'Message-ID', 'Subject', 'From', 'To', 'Cc', 'Content-Type'] | |
#example looping with python | |
#loop through the messages and print the subject of each message | |
'''for message in mbox: | |
print message['subject'] | |
''' | |
#this function parses the body of the message (not sure if you need this) and prints/returns it | |
def print_message_body(msg): | |
payload = msg.get_payload() | |
if msg.is_multipart(): | |
div = '' | |
for subMsg in payload: | |
print div | |
print_message_body(subMsg) | |
div = '------------------------------' | |
else: | |
print msg.get_content_type() | |
#only prints the first 200 items | |
print payload[:200] | |
return payload | |
#This function prints the meta data of each message | |
def print_message_metadata(message): | |
for key,value in message.items(): | |
print '\n' | |
print '{}: '.format(key) | |
print '\t{}: '.format(value) | |
#Print the metadata of the message | |
#print_message_metadata(message) | |
#Print the message body | |
#print_message_body(message) | |
#CONVERT MBOX FILE TO JSON FILE | |
#------------- | |
#uses the function from the other script | |
convert_to_json(filename, 'output_test_1.json') | |
#READ in JSON file | |
import json | |
with open('output_test_1.json', 'rU') as f: | |
data = json.load(f) | |
#print in first message | |
print data[0] | |
#returns the following: | |
{u'X-Gmail-Labels': u'Sent,Important', u'Delivered-To': u'[email protected]', u'From': u'John McCarthy <[email protected]>', u'MIME-Version': u'1.0', u'Received': u'by 10.114.78.7 with HTTP; Thu, 10 Dec 2015 08:00:08 -0800 (PST)', u'Cc': [u'JohnMcCarthy<[email protected]>'], u'X-GM-THRID': u'1520186905955984162', u'Bcc': [u'MikeMaelzer<[email protected]>'], u'To': [u'HeatherMcCarthy<[email protected]>', u'JohnMcCarthy<[email protected]>', u'SandraMcCarthy<[email protected]>', u'NeilJaffe<[email protected]>', u'MaryJaffe<[email protected]>'], u'parts': [{u'content': u'can someone "reply all" to this please (except the mysterious BCC person,\r\nwho can\'t)\r\n', u'contentType': u'text/plain'}, {u'content': u'can someone "reply all" to this please (except the mysterious BCC person, who can't)\xa0\n', u'contentType': u'text/html'}], u'Date': u'Thu, 10 Dec 2015 11:00:08 -0500', u'Message-ID': u'<CAFVywbLvJ-tJ34vB7u+XUPYpOqnj6TiC6O+sJohY35GeGVBNeQ@mail.gmail.com>', u'Content-Type': u'multipart/alternative; boundary=001a11c37a8a0ab77e05268d4da6', u'Subject': u'1-to-5 outbound (1 CC) (1 BCC)'} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment