Last active
December 14, 2015 13:58
-
-
Save stansidel/5097413 to your computer and use it in GitHub Desktop.
Script parses the Rails Tutorial pages for code, listings and boxes and puts it to two files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'rubygems' | |
require 'open-uri' | |
require 'nokogiri' | |
current_uri = 'http://ruby.railstutorial.org/chapters/beginning#top' | |
next_uri = true | |
depth = 13 | |
head = <<HTML | |
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
<html lang='en' xml:lang='en' xmlns='http://www.w3.org/1999/xhtml'> | |
<head> | |
<title>Rails Tutorial 3 - Listings</title> | |
<script type="text/javascript" src="http://code.jquery.com/jquery-1.9.1.min.js"></script> | |
<script type="text/javascript"> | |
$(document).ready(function(){ | |
var $simple_codes = $(".chapter-content > .code"); | |
$('.chapter-content').hide(); | |
$('a.heading').each(function(n, el){ | |
var $element = $(el).parent(), | |
$node = $(el).parent().next('.chapter-content'); | |
if($node.is(':empty')) { | |
$element.remove(); | |
$node.remove(); | |
} | |
}).on('click', function(e) { | |
e.preventDefault(); | |
$(this).parent().next().toggle(); | |
}); | |
$('#listings-only').on('click', function(e) { | |
e.preventDefault(); | |
$simple_codes.toggle(); | |
}); | |
}); | |
</script> | |
<style> | |
.side-fixed { | |
background: white; | |
position: fixed; | |
top: 10px; | |
left: 10px; | |
padding: 10px; | |
} | |
</style> | |
<link href="http://ruby.railstutorial.org/stylesheets/polytexnic.css?1361563564" media="screen" rel="stylesheet" type="text/css" /> | |
<link href="http://ruby.railstutorial.org/stylesheets/pygments.css?1361563564" media="screen" rel="stylesheet" type="text/css" /> | |
<link href="http://ruby.railstutorial.org/stylesheets/screen.css?1361563564" media="screen, projection" rel="stylesheet" type="text/css" /> | |
<link href="http://ruby.railstutorial.org/stylesheets/print.css?1361563564" media="print" rel="stylesheet" type="text/css" /> | |
<!--[if lte IE 7]><link href="http://ruby.railstutorial.org/stylesheets/ie.css?1361563564" media="screen" rel="stylesheet" type="text/css" /><![endif]--> | |
<!--[if lt IE 7]> <div style='border: 1px solid #F7941D; background: #FEEFDA; text-align: center; clear: both; height: 75px; position: relative;'> <div style='position: absolute; right: 3px; top: 3px; font-family: courier new; font-weight: bold;'><a href='#' onclick='javascript:this.parentNode.parentNode.style.display="none"; return false;'><img src='http://www.ie6nomore.com/files/theme/ie6nomore-cornerx.jpg' style='border: none;' alt='Close this notice'/></a></div> <div style='width: 640px; margin: 0 auto; text-align: left; padding: 0; overflow: hidden; color: black;'> <div style='width: 75px; float: left;'><img src='http://www.ie6nomore.com/files/theme/ie6nomore-warning.jpg' alt='Warning!'/></div> <div style='width: 275px; float: left; font-family: Arial, sans-serif;'> <div style='font-size: 14px; font-weight: bold; margin-top: 12px;'>You are using an outdated browser</div> <div style='font-size: 12px; margin-top: 6px; line-height: 12px;'>For a better experience using this site, please upgrade to a modern web browser.</div> </div> <div style='width: 75px; float: left;'><a href='http://www.firefox.com' target='_blank'><img src='http://www.ie6nomore.com/files/theme/ie6nomore-firefox.jpg' style='border: none;' alt='Get Firefox 3.5'/></a></div> <div style='width: 75px; float: left;'><a href='http://www.browserforthebetter.com/download.html' target='_blank'><img src='http://www.ie6nomore.com/files/theme/ie6nomore-ie8.jpg' style='border: none;' alt='Get Internet Explorer 8'/></a></div> <div style='width: 73px; float: left;'><a href='http://www.apple.com/safari/download/' target='_blank'><img src='http://www.ie6nomore.com/files/theme/ie6nomore-safari.jpg' style='border: none;' alt='Get Safari 4'/></a></div> <div style='float: left;'><a href='http://www.google.com/chrome' target='_blank'><img src='http://www.ie6nomore.com/files/theme/ie6nomore-chrome.jpg' style='border: none;' alt='Get Google Chrome'/></a></div> </div> </div> <![endif]--> | |
<link href='http://feeds.feedburner.com/railstutorial' rel='alternate' title='Rails Tutorial News' type='application/rss+xml' /> | |
</head> | |
<body class="book"> | |
<div id="container"> | |
<div id="content"> | |
<div id="book_wrap"> | |
<div id="book"> | |
HTML | |
tail = <<HTML | |
</div></div></div></div></div></div> | |
</body> | |
</html> | |
HTML | |
listings = File.open('listings.html', 'w') | |
boxes = File.open('boxes.html', 'w') | |
listings.write(head) | |
listings.write('<div class="side-fixed"><a href="#" id="listings-only">Listings only</a></div>') | |
boxes.write(head) | |
while depth > 0 && next_uri do | |
puts "Reading #{current_uri}" | |
doc = Nokogiri::HTML(open(current_uri)) | |
doc.xpath('//img[@alt="Next Chapter"][1]').each do |node| | |
next_uri = node.parent['href'] | |
current_uri = URI.join(current_uri, next_uri).to_s if next_uri | |
end | |
doc.xpath('//h1[@class="chapter"][1]').each do |node| | |
listings.write("#{node}<div class=\"chapter-content\">") | |
boxes.write("#{node}<div class=\"chapter-content\">") | |
end | |
doc.xpath('//div[@id="book"]/div[@class="code"] | //div[@class="codelisting"]').each do |node| | |
listings.write(node) unless node.text.include?('MIT License') || node.text.include?('BEER-WARE') | |
end | |
doc.css('.sidebar').each do |sidebar| | |
boxes.write(sidebar) | |
end | |
listings.write('</div>') | |
boxes.write('</div>') | |
depth -= 1 | |
end | |
listings.write(tail) | |
boxes.write(tail) | |
listings.close | |
boxes.close |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment