Skip to content

Instantly share code, notes, and snippets.

@benui-dev
Created February 1, 2012 01:57
Show Gist options
  • Select an option

  • Save benui-dev/1714501 to your computer and use it in GitHub Desktop.

Select an option

Save benui-dev/1714501 to your computer and use it in GitHub Desktop.
Parse wiki markup with mwlib
from mwlib.refine import compat,core
from mwlib.parser import nodes
import pprint
def main():
pp = pprint.PrettyPrinter(indent=2)
raw = """
=Cake=
Modern cake, especially layer cakes, normally contain a
combination of [[flour]], [[sugar]], [[egg (food)|eggs]], and [[butter]].
==Varieties==
This is a subheading.
===Chocolate===
Oh and another nested subheading
=Related Cakes=
* Jaffa cakes!?
* This related section is not a subsection
Cool
"""
# Returns raw 'token' classes that aren't much use
#r = core.parse_txt(raw)
### This is the magic
# Returns nicer classes, Paragraph, PreFormatted etc.
parsed = compat.parse_txt(raw)
### Everything below here is just as an example
f = open('test.txt', 'w')
for section in parsed:
recursive_parse(f, section, 0)
f.close
def recursive_parse(f, node, indent):
tabs = indent * " "
indent += 1
print tabs + node.__class__.__name__
if isinstance(node, nodes.Section):
level = u"=" * node.level
title = level + node.children[0].asText()
title = title.rstrip()
f.write(title)
# Skip the text child, move onto the rest
for child in node.children[1:]:
recursive_parse(f, child, indent)
return
elif isinstance(node, nodes.Table):
# Don't process children
print tabs + "Table: Skipping"
return
elif isinstance(node, nodes.Text):
txt = node.asText()
if txt != "\n": # and re.match("^\s+$", txt) == None:
txt = txt.rstrip('\n')
#re.sub("\n", '', txt)
#f.write('<text>' + txt.encode('UTF-8') + '</text>')
f.write(txt.encode('UTF-8'))
else:
f.write("???")
if node.children is not None:
for child in node.children:
recursive_parse(f, child, indent)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment