Created
December 6, 2012 20:11
-
-
Save hollanddd/4227895 to your computer and use it in GitHub Desktop.
scraping practice inside of AR:model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'open-uri' | |
class Deal < ActiveRecord::Base | |
attr_accessible :img_src, :price, :site_href, :site_name, :wine_name | |
def self.wine_woot(url) | |
doc = Nokogiri::HTML(open(url)) | |
data = doc.xpath('//*[@id="summary"]/div') | |
site_name = doc.xpath('html/head/title').text | |
site_href = url + data.xpath('hgroup/a/@href').to_s | |
img_src = doc.xpath('//*[@id="todays-deal"]/a/img/@src').to_s | |
wine_name = data.at_css('h2.fn').text | |
price = data.at_css('.price').text | |
Deal.create(:site_name => site_name, :site_href => site_href, :img_src => img_src, :wine_name => wine_name, :price => price) | |
end | |
def self.cinderella_wine(url) | |
doc = Nokogiri::HTML(open(url)) | |
site_name = doc.xpath('html/head/title').text.split('-').first.strip | |
site_href = "#{url}" | |
img_src = doc.xpath('//*[@id="bottle-shot"]/a/img/@src').to_s | |
wine_name = doc.xpath('//*[@id="title"]/h2/a').text | |
price = "$" + doc.xpath('//*[@id="product-dollars"]').text.strip + "." + doc.xpath('//*[@id="product-cents"]').text.strip | |
Deal.create(:site_name => site_name, :site_href => site_href, :img_src => img_src, :wine_name => wine_name, :price => price) | |
end | |
def self.last_call_wines(url) | |
doc = Nokogiri::HTML(open(url)) | |
site_name = doc.xpath('html/head/title').text.split('|').last.strip | |
site_href = url | |
img_src = doc.xpath('//*[@id="divProductPic"]/img/@src').to_s | |
wine_name = doc.at_css('h3.ProductNameText2').text | |
price = doc.at_css('td.SalePrice/div.price').text.strip | |
Deal.create(:site_name => site_name, :site_href => site_href, :img_src => img_src, :wine_name => wine_name, :price => price) | |
end | |
def self.bacchus_selections(url) | |
doc = Nokogiri::HTML(open(url)) | |
site_name = doc.xpath('html/head/title').text.split('-').last.strip | |
site_href = url | |
img_src = url + doc.xpath('//*[@id="content"]/div[1]/div[2]/a/img/@src').to_s.split('?').first | |
wine_name = doc.xpath('//*[@id="content"]/div[1]/div[1]/h2/a').text | |
price = doc.xpath('//*[@id="content"]/div[1]/div[3]/p[1]').text.strip | |
Deal.create(:site_name => site_name, :site_href => site_href, :img_src => img_src, :wine_name => wine_name, :price => price) | |
end | |
def self.wine_spies(url) | |
doc = Nokogiri::HTML(open(url)) | |
site_name = doc.xpath('html/head/title').text.split('-').first.strip | |
site_href = url | |
img_src = url + doc.xpath('//*[@id="wine-thumb"]/@src').to_s.split('?').first | |
wine_name = doc.at_css('h2#wine-name').text.strip.split.join(" ") | |
price = doc.at_css('td.our-price/div.inner').text.strip | |
Deal.create(:site_name => site_name, :site_href => site_href, :img_src => img_src, :wine_name => wine_name, :price => price) | |
end | |
def self.populate_deals | |
Deal.destroy_all | |
url = %w{ http://www.wine.woot.com http://www.cinderellawine.com http://www.lastcallwines.com http://www.bacchusselections.com http://www.thewinespies.com } | |
url.each do |u| | |
case u | |
when 'http://www.wine.woot.com' | |
begin | |
Deal.wine_woot u | |
rescue | |
puts "#{u} had problems retrieving data" | |
end | |
when 'http://www.cinderellawine.com' | |
begin | |
Deal.cinderella_wine u | |
rescue | |
puts "#{u} had problems retrieving data" | |
end | |
when 'http://www.lastcallwines.com' | |
begin | |
Deal.last_call_wines u | |
rescue | |
puts "#{u} had problems retrieving data" | |
end | |
when 'http://www.bacchusselections.com' | |
begin | |
Deal.bacchus_selections u | |
rescue | |
puts "#{u} had problems retrieving data" | |
end | |
when 'http://www.thewinespies.com' | |
begin | |
Deal.wine_spies u | |
rescue | |
puts "#{u} had problems retrieving data" | |
end | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment