Last active
September 17, 2021 15:04
-
-
Save vascoosx/c2c2dee595595cc77d22f2d1f5777a8b to your computer and use it in GitHub Desktop.
custom sorting xml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue May 24 06:12:11 2016 | |
@author: sh.otsuka | |
""" | |
# experiment based on http://stackoverflow.com/a/8387132/1635993 | |
from lxml import etree | |
data = """<X> | |
<X03>3</X03> | |
<X02>2</X02> | |
<A> | |
<A02>Y</A02> | |
<A01>X</A01> | |
<A03>Z</A03> | |
<K> | |
<J1>3</J1> | |
<J3>5</J3> | |
<J2>4</J2> | |
</K> | |
<K> | |
<J3>1</J3> | |
<J1>2</J1> | |
<J2>3</J2> | |
</K> | |
<R id="3">1</R> | |
<R id="1">2</R> | |
<R id="2">3</R> | |
</A> | |
<X01>1</X01> | |
<B> | |
<B01>Z</B01> | |
<B02>X</B02> | |
<B03>C</B03> | |
</B> | |
</X>""" | |
doc = etree.XML(data,etree.XMLParser(remove_blank_text=True)) | |
#%% sort whole node | |
def parentsort(apath,keyformula): | |
elements = doc.xpath(apath) | |
for element in elements: | |
element[:] = sorted(element,key=keyformula) | |
#%% partial sort | |
def siblingsort(parentpath,childTagFormula,keyformula): | |
parents = doc.xpath(parentpath) | |
for parent in parents: | |
target_index = [i for i,child in enumerate(parent) if childTagFormula(child)] | |
s,f = min(target_index), max(target_index)+1 | |
parent[s:f] = sorted(parent[s:f],key=keyformula) | |
#%% sort A0N | |
parpath = '//A' | |
ct1 = lambda x: x.tag[0] == 'A' | |
k1 = lambda x: x.tag | |
siblingsort(parpath,ct1,k1) | |
#%% sort K | |
kpath = '//A/K' | |
k2 = lambda x: x.tag | |
parentsort(kpath,k2) | |
#%% sort R | |
ct2 = lambda x: x.tag == 'R' | |
k3 = lambda x: x.attrib['id'] | |
siblingsort(parpath, ct2,k3) | |
#%% sort X0N | |
parentsort('//X',lambda x: x.tag) #somehow not necessary | |
ct3 = lambda x: x.tag[0] == 'X' | |
k4 = lambda x: x.tag | |
siblingsort('//X', ct3, k4) | |
#%% pretty print xml | |
print etree.tostring(doc,pretty_print=True) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment