Created
January 20, 2014 17:21
-
-
Save ZhanruiLiang/8524482 to your computer and use it in GitHub Desktop.
Convert XML to human readable format with proper visual indent and without closing tags or pointy brackets.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" ?> | |
<project> | |
<tag1 a1="v1" a2="v2" a3="v3"> | |
<ctag1> | |
hello | |
</ctag1> | |
<ctag2> | |
world | |
</ctag2> | |
</tag1> | |
<tag2> | |
bravo! | |
</tag2> | |
</project> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
project: | |
tag1[a1 = v1, a2 = v2, a3 = v3]: | |
ctag1: hello | |
ctag2: world | |
tag2: bravo! |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import lxml.etree as E | |
INLINE_ATTRS = True | |
def strip_tag(tag): | |
left = tag.find('{') | |
if left < 0: | |
return tag | |
right = tag.find('}', left) | |
return tag[right + 1:] | |
def convert_node(node, indents): | |
""" | |
Visualize a node recursively. Attributes will be converted to tags | |
starting with '@'. | |
For example: <point x="2" y="3"> Good point </point> will be converted to | |
*point: | |
@x: 2 | |
@y: 3 | |
Good point | |
""" | |
indent = ' ' * indents | |
contents = [] | |
if node.text: | |
for line in node.text.split('\n'): | |
line = line.strip() | |
if line: | |
contents.append(line) | |
# If the whole node is short enough, we display it inline. | |
if not node.attrib and len(node) == 0 and len(contents) <= 1: | |
line = contents[0] if contents else '' | |
yield '{indent}{tag}: {line}'.format( | |
indent=indent, tag=strip_tag(node.tag), line=line) | |
return | |
if INLINE_ATTRS: | |
# Diplay tag name and attributes on first line | |
attrs = ', '.join(sorted('{} = {}'.format(attr, value.replace('\n', ' ')) | |
for attr, value in node.attrib.items())) | |
template = '{indent}{tag}[{attrs}]:' if attrs else '{indent}{tag}:' | |
yield template.format(indent=indent, tag=strip_tag(node.tag), attrs=attrs) | |
else: | |
# Display tag name on a line and attributes on other lines. | |
yield '{indent}{tag}:'.format(indent=indent, tag=strip_tag(node.tag)) | |
for attr, value in node.attrib.items(): | |
value = ' '.join(map(str.strip, value.split('\n'))) | |
yield '{indent} @{attr}: {value}'.format(**locals()) | |
# Display children | |
for child in node: | |
yield from convert_node(child, indents + 1) | |
# Display node text | |
for line in contents: | |
yield '{indent} {line}'.format(indent=indent, line=line) | |
def convert(input): | |
infile = open(input, 'r') if isinstance(input, str) else input | |
root = E.fromstring(infile.read().encode('utf-8')) | |
if infile is not input: | |
infile.close() | |
return '\n'.join(convert_node(root, 0)) | |
if __name__ == '__main__': | |
import sys | |
infile = open(sys.argv[1], 'r') | |
outfile = open(sys.argv[2], 'w') if len(sys.argv) >= 3 else sys.stdout | |
outfile.write(convert(infile)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment