Created
February 3, 2012 06:57
-
-
Save corydolphin/1728592 to your computer and use it in GitHub Desktop.
Convert Mailman archive to text and mbox formatted archives.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
mailmanToMBox.py: Inserts line feeds to create mbox format from Mailman Gzip'd | |
Text archives | |
Usage: ./to-mbox.py dir | |
Where dir is a directory containing .txt.gz files pulled from mailman Gzip'd Text | |
""" | |
import sys | |
import os | |
def makeMBox(fIn,fOut): | |
''' | |
from http://lists2.ssc.com/pipermail/linux-list/2006-February/026220.html | |
''' | |
if not os.path.exists(fIn): | |
return False | |
if os.path.exists(fOut): | |
return False | |
out = open(fOut,"w") | |
lineNum = 0 | |
for line in open(fIn): | |
if line.find("From ") == 0: | |
if lineNum != 0: | |
out.write("\n") | |
lineNum +=1 | |
line = line.replace(" at ", "@") | |
out.write(line) | |
out.close() | |
return True | |
if __name__ == '__main__': | |
if len(sys.argv) !=2: | |
print __doc__ | |
sys.exit() | |
rootDir = sys.argv[1] | |
numConv = 0 | |
for root, dirs, files in os.walk(rootDir): | |
for fil in files: | |
if(fil.find('.txt.gz') > -1): | |
inFile = os.path.join(rootDir,fil) | |
outFile = inFile.replace('.txt.gz','.mbox') | |
if not makeMBox(inFile,outFile): | |
print(outFile,' already exists, did not overwrite') | |
else: | |
numConv +=1 | |
print('Converted ' ,str(numConv),'archives to mbox format') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I converted it to python 3. Also stoped decompressiong the gz in python.