Created
February 17, 2011 04:01
-
-
Save mbklein/830949 to your computer and use it in GitHub Desktop.
Adds Nokogiri::XML::Node#semantically_equal?(node) to test XML documents for semantic equality
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'nokogiri' | |
module Nokogiri | |
module XML | |
# Attribute nodes are semantically equal if their names and values match exactly | |
class Attribute | |
def compare_semantics(node, opts) | |
(self.name == node.name) and (self.value == node.value) | |
end | |
end | |
# CDATA nodes are semantically equal if their text strings match exactly, | |
# including leading, trailing, and internal whitespace | |
class CDATA | |
def compare_semantics(node, opts) | |
self.text == node.text | |
end | |
end | |
# Other CharacterData nodes are semantically equal if their text strings | |
# match after stripping leading and trailing whitespace, and collapsing | |
# internal whitespace to a single space | |
class CharacterData | |
def compare_semantics(node, opts) | |
self.text.strip.gsub(/\s+/,' ') == node.text.strip.gsub(/\s+/,' ') | |
end | |
end | |
# Documents are semantically equal if their root nodes are equal | |
class Document | |
def semantically_equal?(node, opts = { :element_order => false }) | |
node.class == self.class and self.root.semantically_equal?(node.root, opts) | |
end | |
end | |
# Elements are semantically equal if they have the same name, and their child nodesets | |
# are equal (as defined by Nokogiri::XML::Node) | |
class Element | |
def compare_semantics(node, opts) | |
(self.name == node.name) | |
self.compare_children_semantically(node, opts) | |
end | |
end | |
# Node A is semantically equal to Node B if and only if: | |
# * Node A and Node B are of the same class | |
# * Node A and Node B are in the same namespace | |
# * Node A and Node B have the same number of child nodes | |
# (excluding ProcessingInstructions, Comments and empty Text nodes) | |
# * Each child of Node A equals one child of Node B | |
# * If called with :element_order => true, child elements must be | |
# in the same relative position in order to be considered equal | |
class Node | |
def semantically_equal?(node, opts = { :element_order => false }) | |
(node.class == self.class) and | |
((self.namespace.nil? and node.namespace.nil?) or (self.namespace.href == node.namespace.href)) and | |
self.compare_semantics(node, opts) | |
end | |
def compare_semantics(node, opts) | |
compare_children_semantically(node, opts) | |
end | |
def compare_children_semantically(node, opts) | |
ignore_proc = lambda do |child| | |
child.is_a?(Comment) or | |
child.is_a?(ProcessingInstruction) or | |
(child.class == Text and child.text.strip.empty?) | |
end | |
local_set = self.children.reject { |child| ignore_proc.call(child) } | |
remote_set = node.children.reject { |child| ignore_proc.call(child) } | |
if local_set.length != remote_set.length | |
return false | |
end | |
local_set.each { |local_node| | |
remote_node = remote_set.find { |rn| local_node.semantically_equal?(rn) } | |
if remote_node.nil? | |
return false | |
else | |
if local_node.is_a?(Element) and opts[:element_order] | |
local_index = self.elements.index(local_node) | |
remote_index = node.elements.index(remote_node) | |
if local_index != remote_index | |
return false | |
end | |
end | |
remote_set.delete(remote_node) | |
end | |
} | |
return remote_set.length == 0 | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment