Created
January 25, 2015 05:23
-
-
Save bachue/078b4080515aed18ba73 to your computer and use it in GitHub Desktop.
OpenLanguage english tutorials downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'httparty' # `gem install httparty` | |
require 'cgi' | |
require 'digest/md5' | |
require 'nokogiri' # `gem install nokogiri` | |
require 'pathname' | |
require 'set' | |
ROOT = Pathname File.expand_path(__dir__) | |
DOWNLOAD_PATH = ROOT.join 'Downloads' | |
USERNAME = 'GIVE_ME_USERNAME_HERE' | |
PASSWORD = 'GIVE_ME_PASSWORD_HERE' | |
BASE = 'http://openlanguage.com' | |
COOKIES = "login_status=1; email=#{CGI.escape USERNAME}; password=#{Digest::MD5.hexdigest PASSWORD}" | |
trap('INT') do | |
$downloading.delete if $downloading && $downloading.exist? | |
exit | |
end | |
def handle_index link, page: 1 | |
url = "#{BASE}#{link}" | |
puts "Fetch #{url}?page=#{page} ..." | |
response = HTTParty.get URI.encode(url), headers: {'Cookie' => COOKIES}, query: {page: page} | |
html = Nokogiri::HTML response.body | |
page_count = html.css('ul.pagination a').map {|a| a.text.to_i }.max if page == 1 | |
html.css('#list .col-xs-6 > a:first-child').each do |a| | |
handle_lesson a.attributes['href'].value | |
end | |
return unless page == 1 | |
(2..page_count).each do |num| | |
handle_index link, page: num | |
end | |
end | |
def handle_lesson link | |
url = "#{BASE}#{link}" | |
puts "Fetch #{url} ..." | |
response = HTTParty.get URI.encode(url), headers: {'Cookie' => COOKIES} | |
html = Nokogiri::HTML response.body | |
name = html.css('h1').text | |
path = DOWNLOAD_PATH.join name | |
path.mkpath | |
html.css('#lesson-downloads a').each do |a| | |
handle_download a.attributes['title'].value, a.attributes['href'].value, name | |
end | |
rescue | |
STDERR.puts "Error on #{url}" | |
STDERR.puts $!.message | |
[email protected] {|backtrace| STDERR.puts backtrace } | |
end | |
def handle_download title, link, name | |
ext = case title | |
when 'Printout' then 'pdf' | |
when 'Lesson Audio', 'Dialogue', 'Vocab Review' then 'mp3' | |
when 'Practice Materials' then 'ppt' | |
else fail "Can't recognize this link: #{title} #{link}" | |
end | |
path = DOWNLOAD_PATH.join name, "#{title}.#{ext}" | |
unless path.exist? | |
link = "#{BASE}#{link}" | |
puts "Download from #{link} ..." | |
$downloading = path | |
if system 'wget', '--tries=10', '-O', path.to_s, link | |
$downloading = nil | |
else | |
path.delete if path.exist? | |
end | |
end | |
rescue | |
STDERR.puts "Error on #{link}" | |
STDERR.puts $!.message | |
[email protected] {|backtrace| STDERR.puts backtrace } | |
end | |
handle_index '/library/learn-english/9/latest' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment