Skip to content

Instantly share code, notes, and snippets.

@prologic
Last active August 29, 2015 14:16
Show Gist options
  • Save prologic/a45961eef344d9fd56a4 to your computer and use it in GitHub Desktop.
Save prologic/a45961eef344d9fd56a4 to your computer and use it in GitHub Desktop.
created by github.com/tr3buchet/gister
#!/usr/bin/env python
from io import BytesIO
from lxml import etree, objectify
xml = """<root xmlns:cci="http://www.w3.org/TR/html4/"><cci:p>Body text <cci:bold class="character" displayname="bold" name="bold">THISTEXTISBOLD </cci:bold>this text isnt<cci:bold class="character" displayname="bold" name="bold"><cci:underline class="character" displayname="underline" name="underline">THISTEXTISBOLDANDUNDERLINED</cci:underline></cci:bold> Blah blah Blah blah Blah blah Blah blah Blah blah Blah blah Blah blah</cci:p></root>""" # noqa
def parse_xml(xml):
parser = etree.XMLParser()
tree = etree.parse(BytesIO(xml), parser)
return tree.getroot()
def remove_namespaces(root):
for elem in root.getiterator():
if not hasattr(elem.tag, 'find'):
continue
i = elem.tag.find('}')
if i >= 0:
elem.tag = elem.tag[i+1:]
objectify.deannotate(root, cleanup_namespaces=True)
def cci2html(node, mapping, stack=None):
stack = stack or []
if node.tag:
tag = mapping.get(node.tag, "??")
if callable(tag):
tag = tag(node)
stack.append(tag)
yield "<{}>".format(tag)
if node.text:
yield node.text
childlist = node.getchildren()
if childlist is not None:
for cnode in childlist:
for x in cci2html(cnode, mapping, stack):
yield x
if stack:
yield "</{}>".format(stack.pop())
if node.tail:
yield node.tail
def to_bold(e):
return "b"
cci_tags = {
"p": "p",
"root": "html",
"bold": to_bold,
"underline": "u",
}
root = parse_xml(xml)
remove_namespaces(root)
print "".join(cci2html(root, cci_tags))
@prologic
Copy link
Author

Output:

$ python -i test.py 
<html><p>Body text <b>THISTEXTISBOLD </b>this text isnt<b><u>THISTEXTISBOLDANDUNDERLINED</u></b> Blah blah Blah blah Blah blah Blah blah Blah blah Blah blah Blah blah</p></html>
>>> 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment