Created
August 11, 2009 09:19
-
-
Save karlbright/165718 to your computer and use it in GitHub Desktop.
The Stack...
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'open-uri' | |
require 'nokogiri' | |
module Distributor | |
module Page | |
def initialize(url=nil) | |
@page = Nokogiri::HTML(open(url)) | |
end | |
end | |
class DCUniverse | |
CatalogURL = "http://www.dccomics.com/dcu/comics/" | |
ProductPattern = /\/dcu\/comics\/(\?\w+=\d+)\z/ | |
attr_reader :catalog | |
def initialize | |
@catalog = get_page(CatalogURL) | |
end | |
def get_page(url) | |
if url.match(ProductPattern) | |
# product page | |
Pages::Product.new(url) | |
else | |
# catalog page | |
Pages::Catalog.new(url) unless url != CatalogURL | |
end | |
end | |
def product(item) | |
get_page(CatalogURL + item.attributes['href']) | |
end | |
module Pages | |
class Catalog | |
include Distributor::Page | |
def items | |
@page.search("#main_content div.accordion ul li a").to_a | |
end | |
end | |
class Product | |
include Distributor::Page | |
def title | |
@page.at("#h2-div").inner_text.strip | |
end | |
end | |
end | |
end | |
end | |
dc = Distributor::DCUniverse.new | |
item = dc.catalog.items.first # <a href="?cm=12435" name="/media/product/1/2/12435_120x180.jpg">BATMAN CONFIDENTIAL #32</a> | |
product = dc.product(item) | |
puts product.title # BATMAN CONFIDENTIAL #32 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment