Skip to content

Instantly share code, notes, and snippets.

@vascoosx
Last active September 17, 2021 15:04
Show Gist options
  • Save vascoosx/c2c2dee595595cc77d22f2d1f5777a8b to your computer and use it in GitHub Desktop.
Save vascoosx/c2c2dee595595cc77d22f2d1f5777a8b to your computer and use it in GitHub Desktop.
custom sorting xml
# -*- coding: utf-8 -*-
"""
Created on Tue May 24 06:12:11 2016
@author: sh.otsuka
"""
# experiment based on http://stackoverflow.com/a/8387132/1635993
from lxml import etree
data = """<X>
<X03>3</X03>
<X02>2</X02>
<A>
<A02>Y</A02>
<A01>X</A01>
<A03>Z</A03>
<K>
<J1>3</J1>
<J3>5</J3>
<J2>4</J2>
</K>
<K>
<J3>1</J3>
<J1>2</J1>
<J2>3</J2>
</K>
<R id="3">1</R>
<R id="1">2</R>
<R id="2">3</R>
</A>
<X01>1</X01>
<B>
<B01>Z</B01>
<B02>X</B02>
<B03>C</B03>
</B>
</X>"""
doc = etree.XML(data,etree.XMLParser(remove_blank_text=True))
#%% sort whole node
def parentsort(apath,keyformula):
elements = doc.xpath(apath)
for element in elements:
element[:] = sorted(element,key=keyformula)
#%% partial sort
def siblingsort(parentpath,childTagFormula,keyformula):
parents = doc.xpath(parentpath)
for parent in parents:
target_index = [i for i,child in enumerate(parent) if childTagFormula(child)]
s,f = min(target_index), max(target_index)+1
parent[s:f] = sorted(parent[s:f],key=keyformula)
#%% sort A0N
parpath = '//A'
ct1 = lambda x: x.tag[0] == 'A'
k1 = lambda x: x.tag
siblingsort(parpath,ct1,k1)
#%% sort K
kpath = '//A/K'
k2 = lambda x: x.tag
parentsort(kpath,k2)
#%% sort R
ct2 = lambda x: x.tag == 'R'
k3 = lambda x: x.attrib['id']
siblingsort(parpath, ct2,k3)
#%% sort X0N
parentsort('//X',lambda x: x.tag) #somehow not necessary
ct3 = lambda x: x.tag[0] == 'X'
k4 = lambda x: x.tag
siblingsort('//X', ct3, k4)
#%% pretty print xml
print etree.tostring(doc,pretty_print=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment