Created
May 5, 2010 17:31
-
-
Save rb2k/391134 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "nokogiri" | |
require "hpricot" | |
require "open-uri" | |
#on my macbook | |
#$ ruby speedtest.rb | |
#uri took: 4.539837 | |
#hpricot took: 3.490182 | |
#nokogiri took: 6.273096 | |
#on EC2 small: | |
#uri took: 11.428387987 | |
#hpricot took: 8.071451074000001 | |
#nokogiri took: 12.722893389 | |
puts "getting data" | |
html_string = open("http://www.reddit.com/").readlines.join("\n") | |
puts "done" | |
def extract_links_noko_css(html) | |
new_links = Array.new | |
Nokogiri::HTML(html).css("a").each do |link| | |
link_to = link.attributes["href"].content rescue nil | |
new_links << link_to | |
end | |
new_links.uniq | |
end | |
def extract_links_noko_css_nocontent(html) | |
new_links = Array.new | |
Nokogiri::HTML(html).css("a").each do |link| | |
link_to = link.attributes["href"] rescue nil | |
new_links << link_to | |
end | |
new_links.uniq | |
end | |
def extract_links_noko_xpath(html) | |
new_links = Array.new | |
Nokogiri::HTML(html).search("//a[@href]").each do |link| | |
link_to = link["href"] | |
new_links << link_to | |
end | |
new_links.uniq | |
end | |
def extract_links_noko_xpath_nohref(html) | |
new_links = Array.new | |
Nokogiri::HTML(html).search("//a").each do |link| | |
link_to = link["href"] rescue nil | |
new_links << link_to | |
end | |
new_links.uniq | |
end | |
def extract_links_hpricot(html) | |
new_links = Array.new | |
Hpricot(html).search("//a[@href]").each do |link| | |
link_to = link["href"] | |
new_links << link_to | |
end | |
new_links.uniq | |
end | |
def extract_links_hpricot_css(html) | |
new_links = Array.new | |
Hpricot(html).search("a").each do |link| | |
link_to = link.attributes["href"].content rescue nil | |
new_links << link_to | |
end | |
new_links.uniq | |
end | |
def extract_links_hpricot_nohref(html) | |
new_links = Array.new | |
Hpricot(html).search("//a").each do |link| | |
link_to = link["href"] | |
new_links << link_to | |
end | |
new_links.uniq | |
end | |
start = Time.now | |
250.times do | |
bla = extract_links_hpricot(html_string) | |
end | |
took = Time.now - start | |
puts "hpricot (xpath) took: #{took}" | |
start = Time.now | |
250.times do | |
bla = extract_links_hpricot_nohref(html_string) | |
end | |
took = Time.now - start | |
puts "hpricot (xpath, no href) took: #{took}" | |
start = Time.now | |
250.times do | |
bla = extract_links_hpricot_css(html_string) | |
end | |
took = Time.now - start | |
puts "hpricot (css) took: #{took}" | |
start = Time.now | |
250.times do | |
bla = extract_links_noko_xpath(html_string) | |
end | |
took = Time.now - start | |
puts "nokogiri (xpath) took: #{took}" | |
start = Time.now | |
250.times do | |
bla = extract_links_noko_css_nocontent(html_string) | |
end | |
took = Time.now - start | |
puts "nokogiri (css_nocontent) took: #{took}" | |
start = Time.now | |
250.times do | |
bla = extract_links_noko_css(html_string) | |
end | |
took = Time.now - start | |
puts "nokogiri (css) took: #{took}" | |
start = Time.now | |
250.times do | |
bla = extract_links_noko_xpath_nohref(html_string) | |
end | |
took = Time.now - start | |
puts "nokogiri (xpath no href) took: #{took}" | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hpricot (xpath) took: 3.599083 | |
hpricot (xpath, no href) took: 3.283622 | |
hpricot (css) took: 4.996853 | |
nokogiri (xpath) took: 4.169071 | |
nokogiri (css_nocontent) took: 4.372877 | |
nokogiri (css) took: 4.494918 | |
nokogiri (xpath no href) took: 3.861592 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment