Skip to content

Instantly share code, notes, and snippets.

@renesugar
Forked from kadin2048/emlToMbox.py
Created April 7, 2020 23:53
Show Gist options
  • Save renesugar/1d605a76a22ab316a8f6e32b6a6f7477 to your computer and use it in GitHub Desktop.
Save renesugar/1d605a76a22ab316a8f6e32b6a6f7477 to your computer and use it in GitHub Desktop.
Combine a directory of .eml files into a single Unix "mbox" file.
#!/usr/bin/env python
""" Converts a directory full of .eml files to a single Unix "mbox" file.
Accepts as input either an individual .eml file or a directory containing one
or more .eml files.
The output mbox will be created if it doesn't already exist. If it exists,
it will be appended to. There is no checking for duplicates, so use caution.
If duplicate filtering is desired, it could be added to addFileToMbox().
Inspired by http://www.cosmicsoft.net/emlxconvert.html
Usage:
$ ./emlToMbox.py inputdir/ output.mbox
$ ./emlToMbox.py input.eml output.mbox
Requires Python 2.5 or later
STATUS: Tested and appears to work.
"""
import os
import sys
import mailbox
global debug
debug = True
def main( arguments ):
infile_name = arguments[1]
dest_name = arguments[2]
if debug:
print "Input is: " + infile_name
print "Output is: " + dest_name
dest_mbox = mailbox.mbox(dest_name, create=True) # if dest doesn't exist create it
dest_mbox.lock() # lock the mbox file
if os.path.isdir(infile_name):
if debug:
print "Detected directory as input, using directory mode"
count = 0
for filename in os.listdir(infile_name):
if filename.split('.')[-1] == "eml":
try:
fi = open(os.path.join(infile_name, filename), 'r')
except:
sys.stderr.write("Error while opening " + filename + "\n")
dest_mbox.close()
raise
addFileToMbox( fi, dest_mbox )
count += 1
fi.close()
if debug:
print "Processed " + str(count) + " total files."
if infile_name.split('.')[-1] == "eml":
if debug:
print "Detected .eml file as input, using single file mode"
try:
fi = open(infile_name, 'r')
except:
sys.stderr.write("Error while opening " + infile_name + "\n")
dest_mbox.close()
raise
addFileToMbox( fi, dest_mbox )
fi.close()
dest_mbox.close() # close/unlock the mbox file
return 0
def addFileToMbox( fi, dest_mbox ):
# Any additional preprocessing logic goes here, e.g. duplicate filter
try:
dest_mbox.add( fi )
except:
dest_mbox.close()
raise
if __name__ == "__main__":
if len(sys.argv) != 3:
sys.stderr.write("Usage: ./emlToMbox.py input outbox.mbox\n")
sys.exit(1)
sys.exit( main( sys.argv ) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment