Skip to content

Instantly share code, notes, and snippets.

@bachue
Created January 25, 2015 05:23
Show Gist options
  • Save bachue/078b4080515aed18ba73 to your computer and use it in GitHub Desktop.
Save bachue/078b4080515aed18ba73 to your computer and use it in GitHub Desktop.
OpenLanguage english tutorials downloader
require 'httparty' # `gem install httparty`
require 'cgi'
require 'digest/md5'
require 'nokogiri' # `gem install nokogiri`
require 'pathname'
require 'set'
ROOT = Pathname File.expand_path(__dir__)
DOWNLOAD_PATH = ROOT.join 'Downloads'
USERNAME = 'GIVE_ME_USERNAME_HERE'
PASSWORD = 'GIVE_ME_PASSWORD_HERE'
BASE = 'http://openlanguage.com'
COOKIES = "login_status=1; email=#{CGI.escape USERNAME}; password=#{Digest::MD5.hexdigest PASSWORD}"
trap('INT') do
$downloading.delete if $downloading && $downloading.exist?
exit
end
def handle_index link, page: 1
url = "#{BASE}#{link}"
puts "Fetch #{url}?page=#{page} ..."
response = HTTParty.get URI.encode(url), headers: {'Cookie' => COOKIES}, query: {page: page}
html = Nokogiri::HTML response.body
page_count = html.css('ul.pagination a').map {|a| a.text.to_i }.max if page == 1
html.css('#list .col-xs-6 > a:first-child').each do |a|
handle_lesson a.attributes['href'].value
end
return unless page == 1
(2..page_count).each do |num|
handle_index link, page: num
end
end
def handle_lesson link
url = "#{BASE}#{link}"
puts "Fetch #{url} ..."
response = HTTParty.get URI.encode(url), headers: {'Cookie' => COOKIES}
html = Nokogiri::HTML response.body
name = html.css('h1').text
path = DOWNLOAD_PATH.join name
path.mkpath
html.css('#lesson-downloads a').each do |a|
handle_download a.attributes['title'].value, a.attributes['href'].value, name
end
rescue
STDERR.puts "Error on #{url}"
STDERR.puts $!.message
[email protected] {|backtrace| STDERR.puts backtrace }
end
def handle_download title, link, name
ext = case title
when 'Printout' then 'pdf'
when 'Lesson Audio', 'Dialogue', 'Vocab Review' then 'mp3'
when 'Practice Materials' then 'ppt'
else fail "Can't recognize this link: #{title} #{link}"
end
path = DOWNLOAD_PATH.join name, "#{title}.#{ext}"
unless path.exist?
link = "#{BASE}#{link}"
puts "Download from #{link} ..."
$downloading = path
if system 'wget', '--tries=10', '-O', path.to_s, link
$downloading = nil
else
path.delete if path.exist?
end
end
rescue
STDERR.puts "Error on #{link}"
STDERR.puts $!.message
[email protected] {|backtrace| STDERR.puts backtrace }
end
handle_index '/library/learn-english/9/latest'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment