Skip to content

Instantly share code, notes, and snippets.

@mbklein
Created February 17, 2011 04:01
Show Gist options
  • Save mbklein/830949 to your computer and use it in GitHub Desktop.
Save mbklein/830949 to your computer and use it in GitHub Desktop.
Adds Nokogiri::XML::Node#semantically_equal?(node) to test XML documents for semantic equality
require 'nokogiri'
module Nokogiri
module XML
# Attribute nodes are semantically equal if their names and values match exactly
class Attribute
def compare_semantics(node, opts)
(self.name == node.name) and (self.value == node.value)
end
end
# CDATA nodes are semantically equal if their text strings match exactly,
# including leading, trailing, and internal whitespace
class CDATA
def compare_semantics(node, opts)
self.text == node.text
end
end
# Other CharacterData nodes are semantically equal if their text strings
# match after stripping leading and trailing whitespace, and collapsing
# internal whitespace to a single space
class CharacterData
def compare_semantics(node, opts)
self.text.strip.gsub(/\s+/,' ') == node.text.strip.gsub(/\s+/,' ')
end
end
# Documents are semantically equal if their root nodes are equal
class Document
def semantically_equal?(node, opts = { :element_order => false })
node.class == self.class and self.root.semantically_equal?(node.root, opts)
end
end
# Elements are semantically equal if they have the same name, and their child nodesets
# are equal (as defined by Nokogiri::XML::Node)
class Element
def compare_semantics(node, opts)
(self.name == node.name)
self.compare_children_semantically(node, opts)
end
end
# Node A is semantically equal to Node B if and only if:
# * Node A and Node B are of the same class
# * Node A and Node B are in the same namespace
# * Node A and Node B have the same number of child nodes
# (excluding ProcessingInstructions, Comments and empty Text nodes)
# * Each child of Node A equals one child of Node B
# * If called with :element_order => true, child elements must be
# in the same relative position in order to be considered equal
class Node
def semantically_equal?(node, opts = { :element_order => false })
(node.class == self.class) and
((self.namespace.nil? and node.namespace.nil?) or (self.namespace.href == node.namespace.href)) and
self.compare_semantics(node, opts)
end
def compare_semantics(node, opts)
compare_children_semantically(node, opts)
end
def compare_children_semantically(node, opts)
ignore_proc = lambda do |child|
child.is_a?(Comment) or
child.is_a?(ProcessingInstruction) or
(child.class == Text and child.text.strip.empty?)
end
local_set = self.children.reject { |child| ignore_proc.call(child) }
remote_set = node.children.reject { |child| ignore_proc.call(child) }
if local_set.length != remote_set.length
return false
end
local_set.each { |local_node|
remote_node = remote_set.find { |rn| local_node.semantically_equal?(rn) }
if remote_node.nil?
return false
else
if local_node.is_a?(Element) and opts[:element_order]
local_index = self.elements.index(local_node)
remote_index = node.elements.index(remote_node)
if local_index != remote_index
return false
end
end
remote_set.delete(remote_node)
end
}
return remote_set.length == 0
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment