Last active
August 29, 2015 14:07
-
-
Save grekko/807b0aad3f70f925a37c to your computer and use it in GitHub Desktop.
Ruby Tapas Episode fetcher
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export TAPAS_HTTP_COOKIE= | |
export HTTP_AUTH= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source "https://rubygems.org" | |
gem "typhoeus" | |
gem "nokogiri" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GEM | |
remote: https://rubygems.org/ | |
specs: | |
ethon (0.7.1) | |
ffi (>= 1.3.0) | |
ffi (1.9.6) | |
mini_portile (0.6.0) | |
nokogiri (1.6.3.1) | |
mini_portile (= 0.6.0) | |
typhoeus (0.6.9) | |
ethon (>= 0.7.1) | |
PLATFORMS | |
ruby | |
DEPENDENCIES | |
nokogiri | |
typhoeus |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'byebug' | |
require 'typhoeus' | |
require 'nokogiri' | |
require 'pathname' | |
# TapasDownloader class | |
class TapasDownloader | |
HTTP_COOKIE = ENV['TAPAS_HTTP_COOKIE'] || fail("Set a valid TAPAS_HTTP_COOKIE, e.g.\n`export TAPAS_HTTP_COOKIE=\"..\"`") | |
AUTHORIZATION = ENV['HTTP_AUTH'] || fail("Set a valid HTTP_AUTH, e.g.\n`export HTTP_AUTH=\"Basic Z3JlZ29yeS5pZ2VsbXVuZEBnbWFpbC5jb..\"`") | |
HEADERS = { | |
'Accept' => 'text/html', | |
'Accept-Language' => 'en-US,en;q=0.8', | |
'Connection' => 'keep-alive', | |
'Host' => 'rubytapas.dpdcart.com', | |
'Cookie' => HTTP_COOKIE, | |
'Authorization' => AUTHORIZATION, | |
'User-Agent' => 'Tapas Downloader' | |
} | |
RSS_URL = 'https://rubytapas.dpdcart.com/feed' | |
def initialize(download_dir: 'downloads') | |
@hydra = Typhoeus::Hydra.new(max_concurrency: 5) | |
@download_path = Pathname.new(download_dir) | |
end | |
def feed | |
@feed ||= begin | |
response = request(RSS_URL).run | |
response.code == 200 || fail("#{RSS_URL} returned #{response.code}") | |
response.body | |
end | |
end | |
def doc | |
@doc ||= Nokogiri::XML(feed) | |
end | |
def episode_urls | |
@urls ||= doc.xpath('//item/enclosure/@url').map(&:value) | |
end | |
def fetch_all | |
requests = episode_urls.map { |url| request(url) } | |
prepared_requests = filter_and_prepare_requests requests | |
prepared_requests.each { |request| @hydra.queue(request) } | |
if prepared_requests.size > 0 | |
puts "Starting download process for #{prepared_requests.size} files" | |
@hydra.run | |
else | |
puts "No new files to download" | |
end | |
end | |
def filter_and_prepare_requests(requests) | |
requests.map do |request| | |
target_file = @download_path + File.basename(request.base_url) | |
if File.exists? target_file | |
puts "Skipping #{target_file} since the file already exists." | |
next | |
end | |
request.on_complete do |response| | |
on_complete_handler(request, response, target_file) | |
end | |
request | |
end.compact.flatten | |
end | |
def on_complete_handler(request, response, target_file) | |
fail "Boo #{request.base_url}" unless response.code == 200 | |
puts "Finished: #{request.base_url}" | |
IO.binwrite target_file, response.body | |
puts ".. written to #{target_file}.\n" | |
end | |
private | |
def setup_dir | |
FileUtils.mkdir absolute_path_for('downloads') | |
end | |
def request(url, opts = {}) | |
Typhoeus::Request.new url, headers: HEADERS | |
end | |
def absolute_path_for(path) | |
File.expand_path("../#{path}" || '..', __FILE__) | |
end | |
end | |
downloader = TapasDownloader.new download_dir: ARGV.first | |
downloader.fetch_all |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ruby tapas.rb "/Users/gregoryigelmund/Movies/Educational/ruby-tapas" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment