Created
January 5, 2016 09:05
-
-
Save Fifan31/39296e124e14b7f7153f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lxml.etree as le | |
from operator import attrgetter | |
import os | |
import tempfile | |
import difflib | |
def sortbyid(elem): | |
'''Function to sort XML elements by id | |
(where the elements have an 'id' attribute that can be cast to an int) | |
''' | |
_id = elem.get('id') | |
if _id: | |
try: | |
return int(_id) | |
except ValueError: | |
return 0 | |
return 0 | |
def sortbytext(elem): | |
'''Function to sort XML elements by their text contents | |
''' | |
text = elem.text | |
if text: | |
return text | |
else: | |
return '' | |
def sortAttrs(item, sorteditem): | |
'''Function to sort XML attributes alphabetically by key | |
The original item is left unmodified, and it's attributes are | |
copied to the provided sorteditem | |
''' | |
attrkeys = sorted(item.keys()) | |
for key in attrkeys: | |
sorteditem.set(key, item.get(key)) | |
def sortElements(items, newroot): | |
''' Function to sort XML elements | |
The sorted elements will be added as children of the provided newroot | |
This is a recursive function, and will be called on each of the children | |
of items. | |
''' | |
# The intended sort order is to sort by XML element name | |
# If more than one element has the same name, we want to | |
# sort by their text contents. | |
# If more than one element has the same name and they do | |
# not contain any text contents, we want to sort by the | |
# value of their ID attribute. | |
# If more than one element has the same name, but has | |
# no text contents or ID attribute, their order is left | |
# unmodified. | |
# | |
# We do this by performing three sorts in the reverse order | |
items = sorted(items, key=sortbyid) | |
items = sorted(items, key=sortbytext) | |
items = sorted(items, key=attrgetter('tag')) | |
# Once sorted, we sort each of the items | |
for item in items: | |
# Create a new item to represent the sorted version | |
# of the next item, and copy the tag name and contents | |
newitem = le.Element(item.tag) | |
if item.text and not item.text.isspace(): | |
newitem.text = item.text | |
# Copy the attributes (sorted by key) to the new item | |
sortAttrs(item, newitem) | |
# Copy the children of item (sorted) to the new item | |
sortElements(list(item), newitem) | |
# Append this sorted item to the sorted root | |
newroot.append(newitem) | |
def sortFile(fileobj): | |
''' Function to sort the provided XML file | |
fileobj.filename will be left untouched | |
A new sorted copy of it will be created at fileobj.tmpfilename | |
''' | |
with open(fileobj['filename'], 'r') as original: | |
# parse the XML file and get a pointer to the top | |
xmldoc = le.parse(original) | |
xmlroot = xmldoc.getroot() | |
# create a new XML element that will be the top of | |
# the sorted copy of the XML file | |
newxmlroot = le.Element(xmlroot.tag) | |
# create the sorted copy of the XML file | |
sortAttrs(xmlroot, newxmlroot) | |
sortElements(list(xmlroot), newxmlroot) | |
# write the sorted XML file to the temp file | |
newtree = le.ElementTree(newxmlroot) | |
with open(fileobj['tmpfilename'], 'wb') as newfile: | |
newtree.write(newfile, pretty_print=True) | |
def createFileObj(prefix, name): | |
''' Prepares the location of the temporary file that will be created by xmldiff | |
''' | |
return { | |
"filename": os.path.abspath(name), | |
"tmpfilename": tempfile.NamedTemporaryFile(suffix=os.path.basename(name), | |
prefix=prefix, delete=True).name | |
} | |
def compare(expected, actual): | |
# sort each of the specified files | |
filefrom = createFileObj("from", expected) | |
sortFile(filefrom) | |
fileto = createFileObj("to", actual) | |
sortFile(fileto) | |
with open(filefrom['tmpfilename'], mode='r') as expFile, open(fileto['tmpfilename'], mode='r') as actFile: | |
expectedLines = expFile.readlines() | |
actualLines = actFile.readlines() | |
unified_diff = difflib.unified_diff(expectedLines, actualLines, filefrom['filename'], | |
fileto['filename']) | |
os.remove(expFile.name) | |
os.remove(actFile.name) | |
return unified_diff |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment