Skip to content

Instantly share code, notes, and snippets.

@znz
Last active December 25, 2015 18:19
Show Gist options
  • Save znz/7019382 to your computer and use it in GitHub Desktop.
Save znz/7019382 to your computer and use it in GitHub Desktop.
Nokogiri::XML::SAX::Parser example
% ruby sax-example.rb
ruby 2.0.0p247 (2013-06-27 revision 41674) [x86_64-darwin11.4.2]
Nokogiri::VERSION = 1.6.0
Nokogiri::LIBXML_VERSION = 2.8.0
[:start_document]
[:start_element_namespace, "root", [], nil, nil, []]
[:characters, "\n "]
[:start_element_namespace, "foo", [], nil, nil, []]
[:characters, "foo"]
[:cdata_block, "cdata!\n<foo><bar>\n"]
[:start_element_namespace, "bar", [], nil, nil, []]
[:end_element_namespace, "bar", nil, nil]
[:end_element_namespace_texts, "bar", ""]
[:characters, "bar\n "]
[:characters, "\n"]
[:error, "PCDATA invalid Char value 11\n"]
[:characters, "\n"]
[:error, "Unregistered error message\n"]
[:error, "PCDATA invalid Char value 11\n"]
[:error, "Sequence ']]>' not allowed in content\n"]
[:error, "Sequence ']]>' not allowed in content\n"]
[:error, "internal error"]
[:error, "Extra content at the end of the document\n"]
[:end_document]
#!/usr/bin/env ruby
require 'nokogiri'
puts RUBY_DESCRIPTION
puts "Nokogiri::VERSION = #{Nokogiri::VERSION}"
puts "Nokogiri::LIBXML_VERSION = #{Nokogiri::LIBXML_VERSION}"
class MyDocument < Nokogiri::XML::SAX::Document
DEBUG_PRINT = Hash.new(true)
#DEBUG_PRINT[:fields_data] = true
def xmldecl(version, encoding, standalone)
p [:xmldecl, version, encoding, standalone] if DEBUG_PRINT[:xmldecl]
end
def start_document
p [:start_document]
@text_stack = []
end
def end_document
p [:end_document]
end
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
p [:start_element_namespace, name, attrs, prefix, uri, ns] if DEBUG_PRINT[:start_element_namespace]
@text_stack.push ""
end
def end_element_namespace(name, prefix = nil, uri = nil)
p [:end_element_namespace, name, prefix, uri] if DEBUG_PRINT[:end_element_namespace]
texts = @text_stack.pop
p [:end_element_namespace_texts, name, texts] if DEBUG_PRINT[:end_element_namespace_texts]
end
def characters(string)
p [:characters, string] if DEBUG_PRINT[:characters]
@text_stack.each do |text|
text << string
end
end
def comment(string)
p [:comment, string]
end
def warning(string)
p [:warning, string]
end
def error(string)
p [:error, string]
end
def cdata_block(string)
p [:cdata_block, string]
@text_stack.each do |text|
text << string
end
end
end
parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
xml = <<-XML
<root>
<foo>foo<![CDATA[cdata!
<foo><bar>
]]><bar/>bar
<?some-pi data?>
\v
<![CDATA[
\v
]]>
</foo>
</root>
XML
parser.parse(xml)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment