November 13, 2008 20:29 · November 17, 2008 18:25 · November 25, 2008 21:55 · December 4, 2008 22:11 · January 6, 2009 03:57 · February 5, 2009 01:24
 XML Document parsing benchmark
                               user     system      total        real
 hpricot:xml:doc           10.160000   0.950000  11.110000 ( 11.144462)
 hpricot2:xml:doc           0.950000   0.000000   0.950000 (  0.953266)
 nokogiri:compat:doc        0.220000   0.020000   0.240000 (  0.238401)
 nokogiri:xml:doc           0.170000   0.030000   0.200000 (  0.200283)

 XML XPath benchmarks (//status/text, //user/name)
                               user     system      total        real
 hpricot:xml:xpath          7.580000   1.150000   8.730000 (  8.728314)
 For an html snippet 2374 bytes long ...
                          user     system      total        real
 regex * 1000          0.160000   0.010000   0.170000 (  0.182207)
 nokogiri * 1000       1.440000   0.060000   1.500000 (  1.537546)
 hpricot * 1000        5.740000   0.650000   6.390000 (  6.401207)

 it took an average of 0.0015 seconds for Nokogiri to parse and operate on an HTML snippet 2374 bytes long
 it took an average of 0.0064 seconds for Hpricot to parse and operate on an HTML snippet 2374 bytes long

 For an html snippet 97517 bytes long ...
 require 'rubygems'
 require 'nokogiri'

 class Nokogiri::XML::Node
  def method_missing name, *args, &block
    if args.empty?
      list = xpath("//#{name}")
    elsif args.first.is_a? Hash
      hash = args.first
      if hash[:css]
 diff --git a/test/test_sanitizer.rb b/test/test_sanitizer.rb
 index 22a99ee..5b8e0ba 100644
 --- a/test/test_sanitizer.rb
 +++ b/test/test_sanitizer.rb
 @@ -38,7 +38,11 @@ class SanitizeTest < Test::Unit::TestCase
 #         xhtmloutput = htmloutput
 #         rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
       if WhiteList::VOID_ELEMENTS.include?(tag_name)
 +        if Nokogiri::LIBXML_VERSION >= "2.6.16"
 +          htmloutput = "<#{tag_name} title='1'/><p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
 # proposed dryopteris API
 # returned object with singleton method body()

 require 'rubygems'
 require 'dryopteris'

 haxxored_doc = "<html><head></head><body>haxxored!<script src='http://haxxored.com'></script></body></html>"

 sanitized_doc = Dryopteris.sanitize(haxxored_doc)
 puts sanitized_doc # => "<html><head></head><body>haxxored!&lt;script src="http://haxxored.com"/&gt;</body></html>"
 # bash function "awkp" for grabbing fields from output
 # e.g., "ps -ef | fgrep mongrel | awkp 2 | xargs kill"

 function awkp {
    narg=$1
    awk "{print \$$1}"
 }
 require 'ffi'

 module Nokogiri
  module LibXML

    extend FFI::Library

    ffi_lib "/usr/lib/libxml2.so"

    callback :start_document_sax_func, [:pointer], :void
 #! /usr/bin/ruby
 #
 # memory leak?
 #
 require 'nokogiri'

 1000.times do
  doc = Nokogiri::XML("<root><item>1</item></root>")
  doc.at('item').remove
  putc "."
 # in response to El_Matador, one way to search for regular expressions using Nokogiri

 require 'rubygems'
 require 'nokogiri'

 value = Nokogiri::HTML.parse(<<-HTML_END)
  "<html>
    <body>
      <p id='para-1'>A</p>
      <p id='para-22'>B</p>
 #!/usr/local/bin/ruby
 require 'rubygems'
 require 'twitter'
 require 'open-uri'
 require 'nokogiri'

 TARGET_USER  = "aplusk"
 TARGET_COUNT = 1_000_000
 URL          = "http://twitter.com/#{TARGET_USER}"
 ID           = '#follower_count'
	XML Document parsing benchmark
	user system total real
	hpricot:xml:doc 10.160000 0.950000 11.110000 ( 11.144462)
	hpricot2:xml:doc 0.950000 0.000000 0.950000 ( 0.953266)
	nokogiri:compat:doc 0.220000 0.020000 0.240000 ( 0.238401)
	nokogiri:xml:doc 0.170000 0.030000 0.200000 ( 0.200283)

	XML XPath benchmarks (//status/text, //user/name)
	user system total real
	hpricot:xml:xpath 7.580000 1.150000 8.730000 ( 8.728314)
	For an html snippet 2374 bytes long ...
	user system total real
	regex * 1000 0.160000 0.010000 0.170000 ( 0.182207)
	nokogiri * 1000 1.440000 0.060000 1.500000 ( 1.537546)
	hpricot * 1000 5.740000 0.650000 6.390000 ( 6.401207)

	it took an average of 0.0015 seconds for Nokogiri to parse and operate on an HTML snippet 2374 bytes long
	it took an average of 0.0064 seconds for Hpricot to parse and operate on an HTML snippet 2374 bytes long

	For an html snippet 97517 bytes long ...
	require 'rubygems'
	require 'nokogiri'

	class Nokogiri::XML::Node
	def method_missing name, *args, &block
	if args.empty?
	list = xpath("//#{name}")
	elsif args.first.is_a? Hash
	hash = args.first
	if hash[:css]
	diff --git a/test/test_sanitizer.rb b/test/test_sanitizer.rb
	index 22a99ee..5b8e0ba 100644
	--- a/test/test_sanitizer.rb
	+++ b/test/test_sanitizer.rb
	@@ -38,7 +38,11 @@ class SanitizeTest < Test::Unit::TestCase
	# xhtmloutput = htmloutput
	# rexmloutput = "<image title='1'>foo <bad>bar</bad> baz</image>"
	if WhiteList::VOID_ELEMENTS.include?(tag_name)
	+ if Nokogiri::LIBXML_VERSION >= "2.6.16"
	+ htmloutput = "<#{tag_name} title='1'/><p>foo <bad>bar</bad> baz</p>"
	# proposed dryopteris API
	# returned object with singleton method body()

	require 'rubygems'
	require 'dryopteris'

	haxxored_doc = "<html><head></head><body>haxxored!<script src='http://haxxored.com'></script></body></html>"

	sanitized_doc = Dryopteris.sanitize(haxxored_doc)
	puts sanitized_doc # => "<html><head></head><body>haxxored!<script src="http://haxxored.com"/></body></html>"
	# bash function "awkp" for grabbing fields from output
	# e.g., "ps -ef \| fgrep mongrel \| awkp 2 \| xargs kill"

	function awkp {
	narg=$1
	awk "{print \$$1}"
	}
	require 'ffi'

	module Nokogiri
	module LibXML

	extend FFI::Library

	ffi_lib "/usr/lib/libxml2.so"

	callback :start_document_sax_func, [:pointer], :void
	#! /usr/bin/ruby
	#
	# memory leak?
	#
	require 'nokogiri'

	1000.times do
	doc = Nokogiri::XML("<root><item>1</item></root>")
	doc.at('item').remove
	putc "."
	# in response to El_Matador, one way to search for regular expressions using Nokogiri

	require 'rubygems'
	require 'nokogiri'

	value = Nokogiri::HTML.parse(<<-HTML_END)
	"<html>
	<body>
	<p id='para-1'>A</p>
	<p id='para-22'>B</p>
	#!/usr/local/bin/ruby
	require 'rubygems'
	require 'twitter'
	require 'open-uri'
	require 'nokogiri'

	TARGET_USER = "aplusk"
	TARGET_COUNT = 1_000_000
	URL = "http://twitter.com/#{TARGET_USER}"
	ID = '#follower_count'