Last active
June 8, 2017 23:53
-
-
Save mgd020/0c026bc51a37be3268dd9fcc6589f668 to your computer and use it in GitHub Desktop.
Convert between XML and JSON
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import absolute_import, division, print_function, unicode_literals | |
from xml import sax | |
from xml.sax import saxutils | |
class XMLParser(sax.handler.ContentHandler): | |
def __init__(self, cdata_key): | |
self.cdata_key = cdata_key | |
self.root = {} | |
self.elem = [self.root] | |
def startElement(self, name, attrs): # NOQA | |
elem = dict(attrs.items()) | |
self.elem[-1].setdefault(name, []).append(elem) | |
self.elem.append(elem) | |
def endElement(self, name): # NOQA | |
elem = self.elem[-1] | |
if self.cdata_key and self.cdata_key in elem: | |
if any(key != self.cdata_key and isinstance(value, list) for key, value in elem.iteritems()): | |
elem.pop(self.cdata_key) | |
else: | |
elem[self.cdata_key] = ''.join(elem[self.cdata_key]) | |
self.elem.pop() | |
def characters(self, content): | |
if self.cdata_key: | |
self.elem[-1].setdefault(self.cdata_key, []).append(content) | |
def xml_to_json(stream, cdata_key): | |
""" | |
Read XML into json compatible data. | |
stream: the input stream. | |
cdata_key: the key for storing CDATA. If None no CDATA is stored. | |
Returns data dict. | |
""" | |
handler = XMLParser(cdata_key) | |
sax.parse(stream, handler) | |
return handler.root | |
class XMLGenerator(object): | |
"""Adds indent and emptyElement to xml.sax.saxutils.XMLGenerator.""" | |
def __init__(self, stream, encoding=None, indent=None): | |
args = [stream] | |
if encoding is not None: | |
args.append(encoding) | |
self.stream = stream | |
self.generator = saxutils.XMLGenerator(*args) | |
self.indent_level = 0 | |
self.indent_width = indent | |
def __getattr__(self, name): | |
return getattr(self.generator, name) | |
def endDocument(self): # NOQA | |
self.generator.endDocument() | |
self.stream.write('\n') | |
def startElement(self, name, attrs): # NOQA | |
if self.indent_level: | |
self.newline() | |
self.generator.startElement(name, attrs) | |
self.indent_level += self.indent_width | |
self.nested_elements = False | |
def emptyElement(self, name, attrs): # NOQA | |
self.newline() | |
attr_str = ''.join(' {}={}'.format(key, saxutils.quoteattr(value)) for key, value in attrs.iteritems()) | |
self.stream.write('<{}{}{}/>'.format(name, attr_str, ' ' if self.indent_width else '')) | |
self.nested_elements = True | |
def endElement(self, name): # NOQA | |
self.indent_level -= self.indent_width | |
if self.nested_elements: | |
self.newline() | |
else: | |
self.nested_elements = True | |
self.generator.endElement(name) | |
def characters(self, content): | |
self.generator.characters(content) | |
self.nested_elements = False | |
def newline(self): | |
if not self.indent_width: | |
return | |
self.stream.write('\n') | |
for i in xrange(self.indent_level): | |
self.stream.write(' ') | |
def json_to_xml(data, stream, cdata_key, encoding=None, indent=None, empty=True): | |
""" | |
Write XML to stream based on json compatible data. | |
data: a dict with the same format that xml_to_json produces. | |
stream: the output stream to write to. | |
cdata_key: the key used to store CDATA. | |
encoding: XML encoding | |
indent: the number of spaces to indent. None or 0 means no indent or newlines. | |
empty: enable empty elements in the XML | |
Returns None. | |
""" | |
if not data: | |
return | |
not_empty = not empty | |
root_name, root_values = data.items()[0] | |
elements = [(root_name, root_values[0])] | |
generator = XMLGenerator(stream, encoding, indent) | |
generator.startDocument() | |
while elements: | |
element = elements.pop() | |
if isinstance(element, tuple): | |
name = element[0] | |
attrs = {} | |
cdata = None | |
children = [] | |
for key, value in element[1].iteritems(): | |
if key == cdata_key: | |
cdata = value | |
elif isinstance(value, list): | |
for elem in reversed(value): | |
children.append((key, elem)) | |
else: | |
attrs[key] = value | |
if not_empty or cdata or children: | |
generator.startElement(name, attrs) | |
elements.append(name) | |
if children: | |
elements.extend(children) | |
elif cdata: | |
generator.characters(cdata) | |
else: | |
generator.emptyElement(name, attrs) | |
else: | |
generator.endElement(element) | |
generator.endDocument() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment