Skip to content

Instantly share code, notes, and snippets.

@gbraccialli
Last active November 11, 2016 11:18
Show Gist options
  • Save gbraccialli/6dacfdb16717adfe50b64c7c02e716b6 to your computer and use it in GitHub Desktop.
Save gbraccialli/6dacfdb16717adfe50b64c7c02e716b6 to your computer and use it in GitHub Desktop.
import xml.etree.ElementTree as xml, sys
file = sys.argv[1]
tree = xml.parse(file)
#from lxml import etree
#import sys
#parser = etree.XMLParser(recover=True)
#tree = etree.parse(file,parser=parser)
root = tree.getroot()
max_level = -1
show_attributes = True
if len(sys.argv) > 2 and sys.argv[2]:
max_level = int(sys.argv[2])
if len(sys.argv) > 3 and sys.argv[3]:
show_attributes = bool(int(sys.argv[3]))
def count_tree(level, tag, tags):
label = tag.tag
#print 'start:' + label
if label not in tags:
tags[label] = {}
tags[label]['count'] = 0
tags[label]['child'] = {}
tags[label]['attributes'] = {}
tags[label]['count'] += 1
if show_attributes:
for attribute in tag.attrib:
if attribute not in tags[label]['attributes']:
tags[label]['attributes'][attribute] = 0
tags[label]['attributes'][attribute] += 1
if max_level == -1 or level < max_level:
for child in tag:
tags[label]['child'] = count_tree(level+1, child, tags[label]['child'])
return tags
def print_level(level,tags):
for tag in tags:
print str(level) + '-' * 4 * level + tag + ' : ' + str(tags[tag]['count'])
for attribute in tags[tag]['attributes']:
print str(level+1) + '-' * 4 * (level+1) + '@' + attribute + ' : ' + str(tags[tag]['attributes'][attribute])
print_level(level+1,tags[tag]['child'])
def write_xml(file,level,tags):
for tag in tags:
file.write( '<' + tag + ' count="' + str(tags[tag]['count']) + '">')
for attribute in tags[tag]['attributes']:
file.write( '<_' + attribute + ' count="' + str(tags[tag]['attributes'][attribute]) + '"/>')
write_xml(file,level+1,tags[tag]['child'])
file.write( '</' + tag + '>')
all = {}
all = count_tree(0,root,all)
#print(all)
print_level(0, all)
outxml = open(file + '_xml_structure.xml', 'w')
print_level(0,all)
write_xml(outxml,0,all)
outxml.close()
#import json
#with open('result.json', 'w') as fp:
# json.dump(all, fp)
#import dicttoxml
#xml = dicttoxml.dicttoxml(all)
#print(xml)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment