Skip to content

Instantly share code, notes, and snippets.

@alswl
Created July 27, 2012 01:22
Show Gist options
  • Save alswl/3185641 to your computer and use it in GitHub Desktop.
Save alswl/3185641 to your computer and use it in GitHub Desktop.
moin2txt
#!/usr/bin/env python2
# coding=utf-8
# convert MoinMoin wiki to text
# for moinmoin 1.9.x
# author: alswl
# update at: 2012-07-22
import sys
import os
import argparse
import binascii
import re
import shutil
IS_DECODE_PATH = False
def convert(root, dir, target):
name = name_decode(dir)
if not name:
return
dst = os.path.join(target, name.replace('/', '-') + '.txt')
try:
version = open(os.path.join(root, dir, 'current'),
'r').readline().strip()
src = os.path.join(root, dir, 'revisions', version)
shutil.copyfile(src, dst)
except IOError, e:
if IS_DECODE_PATH:
dir = dir.replace('(', r'\(').replace(')', r'\)')
sys.stderr.write('File %s, Name: %s, Message: %s\n'
%(dir, name, str(e)))
def name_decode(name):
raw = ''
lastpos = 0
ENCODE_RE = re.compile(r'\(([\w\d]+)\)')
match =ENCODE_RE.search(name)
while(match):
raw += name[lastpos : lastpos + match.start()]
raw += binascii.unhexlify(match.groups()[0])
lastpos += match.end()
match = ENCODE_RE.search(name[lastpos:])
return raw
def walk(path, target):
for dir in os.listdir(path):
convert(path, dir, target)
def main():
parser = argparse.ArgumentParser(
description='Convert moin wiki to text archieves'
)
parser.add_argument('--input', '-i',
help='the path of moinmoin/data/pages',
type=str,
required=True)
parser.add_argument('--output', '-o',
help='the path os target',
type=str,
required=True)
args = parser.parse_args()
walk(args.input, args.output)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment