jellybob · September 29, 2012 21:38
diff --git a/download_harrington.rb b/download_harrington.rb
 # Go to http://www.baen.com/series_list.asp?letter=H and find the book ID for the one you
 # want, and replace the book variable with it. Run this program, and add the resulting
 # index.html to Calibre, which can then convert it to a .mobi. Finally, email the .mobi
 # to your Kindle and enjoy.
 require 'capybara'
 require 'capybara/dsl'
 require 'capybara/webkit'
 require 'fileutils'

 book = "0743435710"

 FileUtils.mkdir_p("#{book}")

 include Capybara::DSL
 Capybara.current_driver = :webkit

 Capybara.string(page)
 Capybara.default_selector = :css

 def chapter_title
  find("h1").text
 end

 base = "http://www.baenebooks.com/chapters/#{book}/"
 visit "#{base}/#{book}_toc.htm"

 chapters = all("a").reject { |link|
  %w{Back Next}.include?(link.text)
 }.collect { |link|
  { title: link.text, href: link["href"] }
 }

 book_title = chapter_title
 File.open("#{book}/index.html", "w") do |index|
  index << "<html><body><h1>#{book_title}</h1><ul>"

  chapter_number = 0
  chapters.each do |chapter|
    visit "#{base}/#{chapter[:href]}"
    print "Chapter #{chapter_number+1}/#{chapters.size}\r"
    index << %Q{<li><a href="chapter_#{chapter_number}.html">#{chapter_title}</a></li>}

    # I redownload with curl here because Capybara was having trouble with non-ASCII characters.
    %x{curl -s "#{base}/#{chapter[:href]}" > #{book}/chapter_#{chapter_number}.html}

    chapter_number += 1
  end

  index << "</ul></body></html>"
 end
 puts ""
	# Go to http://www.baen.com/series_list.asp?letter=H and find the book ID for the one you
	# want, and replace the book variable with it. Run this program, and add the resulting
	# index.html to Calibre, which can then convert it to a .mobi. Finally, email the .mobi
	# to your Kindle and enjoy.
	require 'capybara'
	require 'capybara/dsl'
	require 'capybara/webkit'
	require 'fileutils'

	book = "0743435710"

	FileUtils.mkdir_p("#{book}")

	include Capybara::DSL
	Capybara.current_driver = :webkit

	Capybara.string(page)
	Capybara.default_selector = :css

	def chapter_title
	find("h1").text
	end

	base = "http://www.baenebooks.com/chapters/#{book}/"
	visit "#{base}/#{book}_toc.htm"

	chapters = all("a").reject { \|link\|
	%w{Back Next}.include?(link.text)
	}.collect { \|link\|
	{ title: link.text, href: link["href"] }
	}

	book_title = chapter_title
	File.open("#{book}/index.html", "w") do \|index\|
	index << "<html><body><h1>#{book_title}</h1><ul>"

	chapter_number = 0
	chapters.each do \|chapter\|
	visit "#{base}/#{chapter[:href]}"
	print "Chapter #{chapter_number+1}/#{chapters.size}\r"
	index << %Q{<li><a href="chapter_#{chapter_number}.html">#{chapter_title}</a></li>}

	# I redownload with curl here because Capybara was having trouble with non-ASCII characters.
	%x{curl -s "#{base}/#{chapter[:href]}" > #{book}/chapter_#{chapter_number}.html}

	chapter_number += 1
	end

	index << "</ul></body></html>"
	end
	puts ""
No results found