Skip to content

Instantly share code, notes, and snippets.

@ArionHardison
Created September 7, 2012 03:46
Show Gist options
  • Save ArionHardison/3662894 to your computer and use it in GitHub Desktop.
Save ArionHardison/3662894 to your computer and use it in GitHub Desktop.
namespace :get_menu do
desc "Scraping restaurant/menu data from wwww.menupages.com"
task :menu_pages => :environment do
require 'mechanize'
agent = Mechanize.new
agent.get("http://www.menupages.com/")
# click on link dependent on type of cousine
cousine_type_address = page.search("#list-by-cuisine a").first.attributes["href"].value
cousine_type_link = page.link_with(:href => cousine_type_address)
cousine_type_link.click
# click on restaurant
restaurant_address = agent.current_page.search(".link").first.attributes["href"].value
restaurant_link = agent.current_page.link_with(:href => restaurant_address)
restaurant_link.click
# main restaurant info
category = agent.current_page.search(".category").text
address = agent.current_page.search(".adr , .postal-code, .cross-street, .street-address, .locality").text
phone_number = agent.current_page.search("#restaurant-info strong").text
# profile & reviews - other restaurant info
other_restaurant_info_categories = agent.current_page.search("#restaurant-other-info dl")
other_restaurant_info_categories.each do |category|
puts puts category.css("dt").text.gsub(":",'') # info category name
puts category.css("dd").text.squish # info category values
end
# go to restaurant menu tab
restaurant_menu_address = agent.current_page.search("#menu-tab a").first.attributes["href"].value
restaurant_link = agent.current_page.link_with(:href => restaurant_menu_address)
restaurant_link.click
end
desc "Scraping restaurant/menu data form foodspotting.com"
task :foodspotting => :environment do
puts "hw2"
end
task :all => [:menu_pages, :foodspotting]
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment