Created
March 9, 2015 02:09
-
-
Save justinian/e0059c55f595c22d4dce to your computer and use it in GitHub Desktop.
Import MoinMoin data to a Hugo site
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import os.path | |
import re | |
import string | |
import time | |
PAGES = "import/data/pages" | |
char_re = re.compile(r"\([0-9a-f]+\)") | |
def char_re_replace(match): | |
string = match.group(0)[1:-1] | |
return "".join(map(chr, [int(string[i:i+2],16) for i in range(len(string)) if i%2 == 0])) | |
dash_sub = string.maketrans( | |
string.punctuation + string.whitespace, | |
"-" * len(string.punctuation + string.whitespace) | |
) | |
dash_trim = re.compile("-$|^-") | |
dash_collapse = re.compile(r"-+") | |
title_split = re.compile(r"(?<=[a-z])(?=[A-Z])") | |
def fix_name(s): | |
return title_split.sub(r" ", s) | |
def fix_filepart(s): | |
s = fix_name(s.lower().translate(dash_sub)) | |
s = dash_collapse.sub("-", s) | |
s = dash_trim.sub("", s) | |
return s | |
def fix_filename(s): | |
return ".".join(map(fix_filepart, os.path.splitext(s))) | |
attachment_re = re.compile(r"attachment:([A-Za-z._0-9]+)") | |
comment_re = re.compile(r"(?m)^#.*$") | |
header_re = re.compile(r"(?m)^\s*(\=+)([^=]+?)\1\s*$") | |
def replace_header(match): | |
return "\n" + "#" * len(match.group(1)) + " " + match.group(2) | |
bold_re = re.compile(r"\'\'\'(.*?)\'\'\'") | |
def replace_bold(match): | |
return "**%s**" % match.group(1) | |
image_re = re.compile(r"\{\{([^\|]+)(\|[^|]*)?(\|[^|]*)?\}\}") | |
def replace_image(match): | |
alt = "" | |
if match.group(2) and len(match.group(2)) > 1: | |
alt = 'alt="%s"' % (match.group(2)[1:],) | |
style = "" | |
if match.group(3) and len(match.group(3)) > 1: | |
style = match.group(3)[1:].replace(',', ' ') | |
return '{{< wrapimage src="%s" %s %s >}}' % ( | |
match.group(1), alt, style) | |
category_re = re.compile("(?<!\w)Category(\w+)") | |
def find_categories(data): | |
return [m for m in category_re.findall(data)] | |
def replace_image_link(match): | |
alt = "" | |
if match.group(2) and len(match.group(2)) > 1: | |
alt = 'alt="%s"' % (match.group(2)[1:],) | |
style = "" | |
if match.group(3) and len(match.group(3)) > 1: | |
style = match.group(3)[1:].replace(',', ' ') | |
return '<img src="%s" %s %s>' % ( | |
match.group(1), alt, style) | |
link_re = re.compile(r"\[\[([^|\]]+)(\|[^\]]+)?\]\]") | |
def replace_link(match): | |
title = match.group(1) | |
if match.group(2) and len(match.group(2)) > 1: | |
title = match.group(2)[1:] | |
if title.startswith("{{"): | |
title = image_re.sub(replace_image_link, title) | |
if not match.group(1).startswith("/files"): | |
link = "/".join(map(fix_filepart, match.group(1).split('/'))) | |
link = '{{< relref "%s.md" >}}' % (link,) | |
else: | |
link = match.group(1) | |
return '[%s](%s)' % (title, link) | |
def write_file(name, date, oldpath, newpath, attachpath): | |
def fix_attach(match): | |
return "/" + os.path.join(attachpath, fix_filename(match.group(1))) | |
data = file(oldpath).read() | |
categories = find_categories(data) | |
data = comment_re.sub("", data) | |
data = category_re.sub("", data) | |
data = bold_re.sub(replace_bold, data) | |
data = attachment_re.sub(fix_attach, data) | |
data = header_re.sub(replace_header, data) | |
#data = image_re.sub(replace_image, data) | |
data = link_re.sub(replace_link, data) | |
data = data.replace(" ~-IA-~", "{{< ia >}}") | |
data = data.replace("~-IA-~", "{{< ia >}}") | |
out = file(newpath, "w") | |
print >> out, "+++" | |
print >> out, "title = \"%s\"" % (name,) | |
print >> out, "categories = [%s]" % (", ".join(['"%s"' % (c,) for c in categories]),) | |
print >> out, "date = \"%s\"" % (time.asctime(date).strip(),) | |
print >> out, "+++\n" | |
out.write(data) | |
def write_page(name, path, revision): | |
filepath = os.path.join(path, "revisions", "%08d" % revision) | |
if not os.path.isfile(filepath): return False | |
date = time.localtime() | |
for line in file(os.path.join(path, "edit-log")): | |
parts = line.split() | |
if int(parts[1]) == revision: | |
date = time.localtime(int(parts[0]) / 1000000) | |
break | |
else: | |
print "no date on", name | |
parts = name.split("/") | |
name = fix_name(parts[-1]) | |
newpath = os.path.join("content", "page", *map(fix_filepart, parts[:-1])) | |
newfile = fix_filepart(name) + ".md" | |
if not os.path.isdir(newpath): | |
os.makedirs(newpath) | |
attachpath = write_attachments(name, path) | |
write_file(name, date, filepath, os.path.join(newpath, newfile), attachpath) | |
return True | |
def write_attachments(name, path): | |
attachments = os.path.join(path, "attachments") | |
webpath = os.path.join("files", *map(fix_filepart, name.split("/"))) | |
newpath = os.path.join("static", webpath) | |
if not os.path.isdir(attachments) or len(os.listdir(attachments)) < 1: | |
return newpath | |
if not os.path.isdir(newpath): | |
os.makedirs(newpath) | |
for filename in os.listdir(attachments): | |
oldfile = os.path.join(attachments, filename) | |
newfile = os.path.join(newpath, fix_filename(filename)) | |
file(newfile, "w").write(file(oldfile).read()) | |
return webpath | |
def write_revisions(name, path, current): | |
for i in range(current, 0, -1): | |
if write_page(name, path, i): | |
return | |
for d in os.listdir(PAGES): | |
path = os.path.join(PAGES, d) | |
if not os.path.isdir(path): continue | |
revs = os.path.join(path, "revisions") | |
if not os.path.isdir(revs): continue | |
current = int(file(os.path.join(path, "current")).read().strip()) | |
name = char_re.sub(char_re_replace, d).replace("_", " ") | |
write_revisions(name, path, current) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment