Created
July 2, 2009 19:10
-
-
Save flazz/139649 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# demonstrate how to extract schema locations and stylesheets from an xml document | |
# also show how to resolve namespaces against pre determined schema implementations | |
# references used: | |
# http://libxml.rubyforge.org/rdoc/index.html | |
# http://www.w3.org/TR/xml-stylesheet/ | |
require 'libxml' | |
require 'enumerator' | |
include LibXML | |
# expect an xml file | |
f = ARGV[0] | |
doc = XML::Parser.file(f).parse | |
# extract the stylesheet links | |
stylesheets = doc.find("//processing-instruction()[name()='xml-stylesheet']").map do |pi| | |
if pi.content =~ /href="(.+?)"/ | |
reference = $1 | |
if pi.content =~ /type="text\/xsl"/ | |
reference | |
else | |
raise 'xml-stylesheet processing instruction is not of type xsl' | |
end | |
else | |
raise 'no href in xml-stylesheet processing instruction' | |
end | |
end | |
puts "style sheets used:" | |
puts stylesheets | |
puts | |
# extract namespaces used in entire document | |
namespaces = doc.find("//*").map do |node| | |
node.namespaces.map { |ns| ns.href } | |
end.flatten.uniq | |
puts "namespaces used:" | |
puts namespaces | |
puts | |
# extract schema location elements, last reached overwrites. | |
slocs = {} | |
doc.find("//@xsi:schemaLocation", 'xsi' => 'http://www.w3.org/2001/XMLSchema-instance').each do |sl| | |
sl.value.strip.split.each_slice(2) { |ns, url| slocs[ns] = url } | |
end | |
puts "schema locations declared:" | |
slocs.each do |ns, url| | |
puts "#{ns} - #{url}" | |
end | |
puts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment