ZhanruiLiang · January 20, 2014 17:21
diff --git a/test.xml b/test.xml
 <?xml version="1.0" ?>
 <project>
  <tag1 a1="v1" a2="v2" a3="v3">
    <ctag1>
      hello
    </ctag1>
    <ctag2>
      world
    </ctag2>
  </tag1>
  <tag2>
    bravo!
  </tag2>
 </project>
diff --git a/test.xmlvis b/test.xmlvis
 project:
  tag1[a1 = v1, a2 = v2, a3 = v3]:
    ctag1: hello
    ctag2: world
  tag2: bravo!
diff --git a/xmlvis.py b/xmlvis.py
 #!/usr/bin/env python
 import lxml.etree as E

 INLINE_ATTRS = True

 def strip_tag(tag):
    left = tag.find('{')
    if left < 0:
        return tag
    right = tag.find('}', left)
    return tag[right + 1:]

 def convert_node(node, indents):
    """
    Visualize a node recursively. Attributes will be converted to tags
    starting with '@'. 

    For example: <point x="2" y="3"> Good point </point> will be converted to
        *point: 
          @x: 2
          @y: 3
          Good point
    """
    indent = '  ' * indents
    contents = []
    if node.text:
        for line in node.text.split('\n'):
            line = line.strip()
            if line:
                contents.append(line)
    # If the whole node is short enough, we display it inline.
    if not node.attrib and len(node) == 0 and len(contents) <= 1:
        line = contents[0] if contents else ''
        yield '{indent}{tag}: {line}'.format(
            indent=indent, tag=strip_tag(node.tag), line=line)
        return

    if INLINE_ATTRS:
        # Diplay tag name and attributes on first line
        attrs = ', '.join(sorted('{} = {}'.format(attr, value.replace('\n', ' '))
            for attr, value in node.attrib.items()))
        template = '{indent}{tag}[{attrs}]:' if attrs else '{indent}{tag}:'
        yield template.format(indent=indent, tag=strip_tag(node.tag), attrs=attrs)
    else:
        # Display tag name on a line and attributes on other lines.
        yield '{indent}{tag}:'.format(indent=indent, tag=strip_tag(node.tag))
        for attr, value in node.attrib.items():
            value = ' '.join(map(str.strip, value.split('\n')))
            yield '{indent}  @{attr}: {value}'.format(**locals()) 
    # Display children
    for child in node:
        yield from convert_node(child, indents + 1)
    # Display node text
    for line in contents:
        yield '{indent}  {line}'.format(indent=indent, line=line)

 def convert(input):
    infile = open(input, 'r') if isinstance(input, str) else input
    root = E.fromstring(infile.read().encode('utf-8'))
    if infile is not input:
        infile.close()
    return '\n'.join(convert_node(root, 0))

 if __name__ == '__main__':
    import sys
    infile = open(sys.argv[1], 'r')
    outfile = open(sys.argv[2], 'w') if len(sys.argv) >= 3 else sys.stdout
    outfile.write(convert(infile))
	<?xml version="1.0" ?>
	<project>
	<tag1 a1="v1" a2="v2" a3="v3">
	<ctag1>
	hello
	</ctag1>
	<ctag2>
	world
	</ctag2>
	</tag1>
	<tag2>
	bravo!
	</tag2>
	</project>
	project:
	tag1[a1 = v1, a2 = v2, a3 = v3]:
	ctag1: hello
	ctag2: world
	tag2: bravo!
	#!/usr/bin/env python
	import lxml.etree as E

	INLINE_ATTRS = True

	def strip_tag(tag):
	left = tag.find('{')
	if left < 0:
	return tag
	right = tag.find('}', left)
	return tag[right + 1:]

	def convert_node(node, indents):
	"""
	Visualize a node recursively. Attributes will be converted to tags
	starting with '@'.

	For example: <point x="2" y="3"> Good point </point> will be converted to
	*point:
	@x: 2
	@y: 3
	Good point
	"""
	indent = ' ' * indents
	contents = []
	if node.text:
	for line in node.text.split('\n'):
	line = line.strip()
	if line:
	contents.append(line)
	# If the whole node is short enough, we display it inline.
	if not node.attrib and len(node) == 0 and len(contents) <= 1:
	line = contents[0] if contents else ''
	yield '{indent}{tag}: {line}'.format(
	indent=indent, tag=strip_tag(node.tag), line=line)
	return

	if INLINE_ATTRS:
	# Diplay tag name and attributes on first line
	attrs = ', '.join(sorted('{} = {}'.format(attr, value.replace('\n', ' '))
	for attr, value in node.attrib.items()))
	template = '{indent}{tag}[{attrs}]:' if attrs else '{indent}{tag}:'
	yield template.format(indent=indent, tag=strip_tag(node.tag), attrs=attrs)
	else:
	# Display tag name on a line and attributes on other lines.
	yield '{indent}{tag}:'.format(indent=indent, tag=strip_tag(node.tag))
	for attr, value in node.attrib.items():
	value = ' '.join(map(str.strip, value.split('\n')))
	yield '{indent} @{attr}: {value}'.format(**locals())
	# Display children
	for child in node:
	yield from convert_node(child, indents + 1)
	# Display node text
	for line in contents:
	yield '{indent} {line}'.format(indent=indent, line=line)

	def convert(input):
	infile = open(input, 'r') if isinstance(input, str) else input
	root = E.fromstring(infile.read().encode('utf-8'))
	if infile is not input:
	infile.close()
	return '\n'.join(convert_node(root, 0))

	if __name__ == '__main__':
	import sys
	infile = open(sys.argv[1], 'r')
	outfile = open(sys.argv[2], 'w') if len(sys.argv) >= 3 else sys.stdout
	outfile.write(convert(infile))