Skip to content

Instantly share code, notes, and snippets.

@jdkirkwood
Last active October 6, 2021 17:20
Show Gist options
  • Save jdkirkwood/726778ad2b6b58e1db379dc48b96d907 to your computer and use it in GitHub Desktop.
Save jdkirkwood/726778ad2b6b58e1db379dc48b96d907 to your computer and use it in GitHub Desktop.
XML sort script
#!python3
# https://stackoverflow.com/a/66596507/7643972
import os
import sys
from lxml import etree
if len(sys.argv) < 2:
print("usage : sort_xml.py file_in.xml [file_in_sorted.xml]")
exit(0)
filename_in = sys.argv[1]
filename_out = ((len(sys.argv) == 3) and sys.argv[2]) or f"{os.path.splitext(filename_in)[0]}_sorted.xml"
def getSortValue(elem):
if isinstance(elem, etree._Comment):
# sort comment by its content
return elem.text
else:
# sort entities by tag and then by name
return elem.tag + elem.attrib.get("id", "")
parser = etree.XMLParser(strip_cdata=False)
doc = etree.parse(filename_in, parser=parser)
for parent in doc.xpath("//*[./*]"): # Search for parent elements
parent[:] = sorted(parent, key=lambda x: getSortValue(x))
with open(filename_out, "wb") as file:
file.write(etree.tostring(doc, pretty_print=True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment