Skip to content

Instantly share code, notes, and snippets.

@nasser
Created May 21, 2012 17:41
Show Gist options
  • Save nasser/2763497 to your computer and use it in GitHub Desktop.
Save nasser/2763497 to your computer and use it in GitHub Desktop.
require "open-uri"
require "hpricot"
require "htmlentities"
require "json"
require "yaml"
Encoding.default_internal = Encoding.default_external = "UTF-8"
coder = HTMLEntities.new
a = %w(a b c d e f g h i j k l m n o p q r s t u v w x y z).reduce([]) do |memo, letter|
ary = Hpricot(open("http://www.drinksmixer.com/glossary/#{letter}/").read).search("div.pm:last > div").map do |ingr|
ingr_name = coder.decode(ingr.search("b").inner_html)
ingr_url = "http://www.drinksmixer.com#{ingr.search("a").last["href"]}"
recp_count_text = Hpricot(open(ingr_url)).search("div.fr4").inner_html
recp_count = if recp_count_text.empty?
nil
else
recp_count_text.scan(/of\s*(\d+)\s*recipes/).last.first.to_i
end
puts "#{ingr_name} : #{recp_count}"
{
name: ingr_name,
count: recp_count
}
end
memo.concat ary
end
File.open("out.json", 'w') {|f| f.write(a.to_json) }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment