Created
November 5, 2015 15:24
-
-
Save jamiejackson/a37e8d3dacb33b2dcbc1 to your computer and use it in GitHub Desktop.
Preserve Comments During XML Parsing in Python 2.7+
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from xml.etree import ElementTree | |
class CommentedTreeBuilder ( ElementTree.XMLTreeBuilder ): | |
def __init__ ( self, html = 0, target = None ): | |
ElementTree.XMLTreeBuilder.__init__( self, html, target ) | |
self._parser.CommentHandler = self.handle_comment | |
def handle_comment ( self, data ): | |
self._target.start( ElementTree.Comment, {} ) | |
self._target.data( data ) | |
self._target.end( ElementTree.Comment ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import xml.etree.ElementTree as ET | |
from CommentedTreeBuilder import CommentedTreeBuilder | |
parser = CommentedTreeBuilder() | |
if __name__ == '__main__': | |
filename = "/opt/lucee/tomcat/conf/server.xml" | |
# this is the important part: use the comment-preserving parser | |
tree = ET.parse(filename, parser) | |
# get the node to add a child to | |
engine_node = tree.find("./Service/Engine") | |
# add a node: Engine.Host | |
host_node = ET.SubElement( | |
engine_node, | |
"Host", | |
name="local.mysite.com", | |
appBase="webapps" | |
) | |
# add a child to new node: Engine.Host.Context | |
ET.SubElement( | |
host_node, | |
'Context', | |
path="", | |
docBase="/path/to/doc/base" | |
) | |
tree.write('out.xml') |
ElementTree.XMLTreeBuilder doesn't exists anymore in 3.5.2, it looks like it has been split, the class we have to inherit may be XMLParser.
class CommentedTreeBuilder ( ET.XMLParser ):
def __init__ ( self, html = 0, target = None, encoding = None ):
ET.XMLParser.__init__( self, html, target, encoding )
self._parser.CommentHandler = self.handle_comment
def handle_comment ( self, data ):
self._target.start( ET.Comment, {} )
self._target.data( data )
self._target.end( ET.Comment )
However I get an error after trying this out:
self._parser.CommentHandler = self.handle_comment
AttributeError: 'CommentedTreeBuilder' object has no attribute '_parser'
I don't understand why it does that because I saw ElementTree.py and the property is well defined...
I tried without the underscore, same thing.
Actually, none of the properties from XMLParser are available, and testing print(str(self.entity))
even crashes Python (Windows 10, 64 bits).
I wonder if I did anything wrong?
Edit:
This solved the problem for me.
# http://stackoverflow.com/questions/33573807/faithfully-preserve-comments-in-parsed-xml-python-2-7
class CommentedTreeBuilder(ET.TreeBuilder):
def __init__(self, *args, **kwargs):
super(CommentedTreeBuilder, self).__init__(*args, **kwargs)
def comment(self, data):
self.start(ET.Comment, {})
self.data(data)
self.end(ET.Comment)
#------------------------------------------------------------------------------
cparser = ET.XMLParser(target = CommentedTreeBuilder())
def read_xml_file(f):
return ET.parse(f, parser=cparser)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Got the parser from Amaury Forgeot d'Arc