Created
November 21, 2011 07:22
-
-
Save provegard/1381912 to your computer and use it in GitHub Desktop.
ElementTree parsing for non-prefixed attributes with default namespace
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from xml.etree import ElementTree as ET | |
from cStringIO import StringIO | |
def parse_attrns(file): | |
"""Parse file to ElementTree instance. Patch non-prefixed attributes | |
with the namespace of the element they belong to. | |
""" | |
events = ("start", ) | |
root = None | |
for event, elem in ET.iterparse(file, events): | |
if event == "start": | |
if root is None: | |
root = elem | |
if elem.tag.find("}") < 0: | |
continue | |
# inherit the uri from the element | |
uri, _ = elem.tag[1:].rsplit("}", 1) | |
for k, v in elem.attrib.items(): | |
if k[:1] != "{": | |
# replace the old attribute with a namespace-prefixed one | |
del elem.attrib[k] | |
k = "{%s}%s" % (uri, k) | |
elem.attrib[k] = v | |
return ET.ElementTree(root) | |
class fileobj(object): | |
def __init__(self, l): | |
self.write = l.append | |
def roundtrip(xml, dns=None): | |
"""Deserialize XML using ElementTree, then serialize it again. | |
>>> roundtrip('<a xmlns="foo"><b id="1" /></a>', dns='foo') | |
'<a xmlns="foo"><b id="1" /></a>' | |
>>> roundtrip('<a><b id="1" /></a>', dns='foo') | |
Traceback (most recent call last): | |
... | |
ValueError: cannot use non-qualified names with default_namespace option | |
>>> roundtrip('<a><b id="1" /></a>') | |
'<a><b id="1" /></a>' | |
""" | |
#e = ET.parse(StringIO(xml)) | |
e = parse_attrns(StringIO(xml)) | |
data = [] | |
e.write(fileobj(data), default_namespace=dns) | |
return "".join(data) | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To make this work in Python3:
And