Created
June 19, 2009 04:31
-
-
Save jdhuntington/132411 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Add books from http://sivers.org/book to a couch db | |
# todo - parse author out of 'title' field | |
require 'rubygems' | |
require 'couchrest' | |
require 'nokogiri' | |
require 'open-uri' | |
require 'clip' | |
options = Clip do |p| | |
p.optional 'd', 'database', :desc => 'URI of database', :default => 'http://localhost:5984/books' | |
p.optional 's', 'source', :desc => 'Data source', :default => 'http://sivers.org/book' | |
p.flag 'v', 'verbose', :desc => 'Make it chatty' | |
end | |
def get_content(noko_source, selector) | |
noko_source.css(selector).first.inner_html | |
end | |
class Book | |
def initialize(noko_source, source) | |
@title = get_content(noko_source, "h3 a") | |
@source = source | |
@isbn = retrieve_isbn get_content(noko_source, "p.small") | |
@derek_sivers_rating = retrieve_rating get_content(noko_source, "p.small") | |
@derek_sivers_summary = get_content(noko_source, ":nth-child(4)") | |
end | |
# sample p.small string: ISBN: 1416541993 READ: 2009-04-03 RATING: 10/10 | |
def retrieve_isbn(source) | |
source =~ /ISBN:\ (\w+)/ | |
raise source.inspect unless $1 | |
$1 | |
end | |
def retrieve_rating(source) | |
source =~ /RATING:\ (\w+)/ | |
raise source.inspect unless $1 | |
$1 | |
end | |
def to_hash | |
{ "title" => @title, | |
"source" => @source, | |
"isbn" => @isbn, | |
"derek_sivers_rating" => @derek_sivers_rating, | |
"derek_sivers_summary" => @derek_sivers_summary } | |
end | |
end | |
doc = Nokogiri::HTML(open(options.source)) | |
db = CouchRest.database!(options.database) | |
books = doc.css('#content div.book').collect do |book| | |
Book.new(book, options.source).to_hash | |
end | |
db.bulk_save books | |
STDERR.puts "#{books.length} saved." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment