Skip to content

Instantly share code, notes, and snippets.

@hollanddd
Created December 6, 2012 20:11
Show Gist options
  • Save hollanddd/4227895 to your computer and use it in GitHub Desktop.
Save hollanddd/4227895 to your computer and use it in GitHub Desktop.
scraping practice inside of AR:model
require 'open-uri'
class Deal < ActiveRecord::Base
attr_accessible :img_src, :price, :site_href, :site_name, :wine_name
def self.wine_woot(url)
doc = Nokogiri::HTML(open(url))
data = doc.xpath('//*[@id="summary"]/div')
site_name = doc.xpath('html/head/title').text
site_href = url + data.xpath('hgroup/a/@href').to_s
img_src = doc.xpath('//*[@id="todays-deal"]/a/img/@src').to_s
wine_name = data.at_css('h2.fn').text
price = data.at_css('.price').text
Deal.create(:site_name => site_name, :site_href => site_href, :img_src => img_src, :wine_name => wine_name, :price => price)
end
def self.cinderella_wine(url)
doc = Nokogiri::HTML(open(url))
site_name = doc.xpath('html/head/title').text.split('-').first.strip
site_href = "#{url}"
img_src = doc.xpath('//*[@id="bottle-shot"]/a/img/@src').to_s
wine_name = doc.xpath('//*[@id="title"]/h2/a').text
price = "$" + doc.xpath('//*[@id="product-dollars"]').text.strip + "." + doc.xpath('//*[@id="product-cents"]').text.strip
Deal.create(:site_name => site_name, :site_href => site_href, :img_src => img_src, :wine_name => wine_name, :price => price)
end
def self.last_call_wines(url)
doc = Nokogiri::HTML(open(url))
site_name = doc.xpath('html/head/title').text.split('|').last.strip
site_href = url
img_src = doc.xpath('//*[@id="divProductPic"]/img/@src').to_s
wine_name = doc.at_css('h3.ProductNameText2').text
price = doc.at_css('td.SalePrice/div.price').text.strip
Deal.create(:site_name => site_name, :site_href => site_href, :img_src => img_src, :wine_name => wine_name, :price => price)
end
def self.bacchus_selections(url)
doc = Nokogiri::HTML(open(url))
site_name = doc.xpath('html/head/title').text.split('-').last.strip
site_href = url
img_src = url + doc.xpath('//*[@id="content"]/div[1]/div[2]/a/img/@src').to_s.split('?').first
wine_name = doc.xpath('//*[@id="content"]/div[1]/div[1]/h2/a').text
price = doc.xpath('//*[@id="content"]/div[1]/div[3]/p[1]').text.strip
Deal.create(:site_name => site_name, :site_href => site_href, :img_src => img_src, :wine_name => wine_name, :price => price)
end
def self.wine_spies(url)
doc = Nokogiri::HTML(open(url))
site_name = doc.xpath('html/head/title').text.split('-').first.strip
site_href = url
img_src = url + doc.xpath('//*[@id="wine-thumb"]/@src').to_s.split('?').first
wine_name = doc.at_css('h2#wine-name').text.strip.split.join(" ")
price = doc.at_css('td.our-price/div.inner').text.strip
Deal.create(:site_name => site_name, :site_href => site_href, :img_src => img_src, :wine_name => wine_name, :price => price)
end
def self.populate_deals
Deal.destroy_all
url = %w{ http://www.wine.woot.com http://www.cinderellawine.com http://www.lastcallwines.com http://www.bacchusselections.com http://www.thewinespies.com }
url.each do |u|
case u
when 'http://www.wine.woot.com'
begin
Deal.wine_woot u
rescue
puts "#{u} had problems retrieving data"
end
when 'http://www.cinderellawine.com'
begin
Deal.cinderella_wine u
rescue
puts "#{u} had problems retrieving data"
end
when 'http://www.lastcallwines.com'
begin
Deal.last_call_wines u
rescue
puts "#{u} had problems retrieving data"
end
when 'http://www.bacchusselections.com'
begin
Deal.bacchus_selections u
rescue
puts "#{u} had problems retrieving data"
end
when 'http://www.thewinespies.com'
begin
Deal.wine_spies u
rescue
puts "#{u} had problems retrieving data"
end
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment