Skip to content

Instantly share code, notes, and snippets.

@maciejmalycha
Created August 17, 2017 08:05
Show Gist options
  • Save maciejmalycha/36ed044dcfe199dedadb8a2cd2885579 to your computer and use it in GitHub Desktop.
Save maciejmalycha/36ed044dcfe199dedadb8a2cd2885579 to your computer and use it in GitHub Desktop.
Translate Mediawiki backup to Confluence source editor format
import re
class Wiki2Confluence(object):
def __init__(self, filename):
super(Wiki2Confluence, self).__init__()
f = open(filename)
self.lines = f.readlines()
def convert(self):
# self.print_list()
# print
self.remove_end_of_line_character()
self.remove_double_blank_lines()
self.add_code_marker()
self.replace_urls()
self.replace_amp()
self.change_headers()
self.change_bullet_list()
self.remove_double_blank_lines()
self.add_code_tag()
self.remove_code_marker()
self.remove_double_blank_lines()
# self.print_list()
def add_code_marker(self):
for i, line in enumerate(self.lines):
if line.startswith(" "):
self.lines[i] = "--CODE--" + self.lines[i]
def remove_code_marker(self):
text = self.return_string()
text = re.sub("--CODE--", "", text)
self.lines = text.split("\n")
def print_list(self):
for i, line in enumerate(self.lines):
print "%s\t%s" % (i, [line])
def remove_end_of_line_character(self):
self.lines = [x.rstrip("\r\n") for x in self.lines]
def remove_double_blank_lines(self):
text = self.return_string()
text = re.sub("\n\n+", "\n\n", text)
self.lines = text.split("\n")
def return_string(self):
return "\n".join(self.lines)
def change_headers(self):
for i, line in enumerate(self.lines):
for level in range(1, 7):
if line.startswith("=" * level + " "):
text = line.strip("= ")
self.lines[i] = "<h%s>%s</h%s>" % (level, text, level)
def change_bullet_list(self):
current_level = 0
for i, line in enumerate(self.lines):
if line.startswith("*"):
for level in range(1, 7):
if line.startswith("*" * level + " "):
text = line.strip("* ")
text = "<li>%s</li>" % text
if current_level < level:
current_level += 1
self.lines[i] = "<ul>\n%s" % text
elif current_level > level:
current_level -= 1
self.lines[i] = "</ul>\n%s" % text
else: # current_level == level:
self.lines[i] = "%s" % text
else:
self.lines[i] = "</ul>\n" * current_level + self.lines[i]
current_level = 0
def add_code_tag(self):
result = []
in_code_block = False
for i, line in enumerate(self.lines):
if line.startswith("--CODE--") and not in_code_block:
result.append("""<ac:structured-macro ac:name="code" ac:schema-version="1">
<ac:plain-text-body><![CDATA[""")
in_code_block = True
if not line.startswith("--CODE--") and in_code_block:
result.append("""]]></ac:plain-text-body>
</ac:structured-macro>""")
in_code_block = False
result.append(line)
self.lines = result
def replace_amp(self):
for i, line in enumerate(self.lines):
if not line.startswith("--CODE--"):
text = self.lines[i]
text = re.sub("&", "&", text)
self.lines[i] = text
def replace_urls(self):
for i, line in enumerate(self.lines):
if not line.startswith("--CODE--"):
text = self.lines[i]
text = re.sub("(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)",
"""<a href="\g<1>">\g<1></a>""",
text)
self.lines[i] = text
pass
# w2c = Wiki2Confluence("lista.txt")
w2c = Wiki2Confluence("wiki.txt")
w2c.convert()
print
print w2c.return_string()
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment