Skip to content

Instantly share code, notes, and snippets.

@grekko
Last active August 29, 2015 14:07
Show Gist options
  • Save grekko/807b0aad3f70f925a37c to your computer and use it in GitHub Desktop.
Save grekko/807b0aad3f70f925a37c to your computer and use it in GitHub Desktop.
Ruby Tapas Episode fetcher
export TAPAS_HTTP_COOKIE=
export HTTP_AUTH=
source "https://rubygems.org"
gem "typhoeus"
gem "nokogiri"
GEM
remote: https://rubygems.org/
specs:
ethon (0.7.1)
ffi (>= 1.3.0)
ffi (1.9.6)
mini_portile (0.6.0)
nokogiri (1.6.3.1)
mini_portile (= 0.6.0)
typhoeus (0.6.9)
ethon (>= 0.7.1)
PLATFORMS
ruby
DEPENDENCIES
nokogiri
typhoeus
require 'byebug'
require 'typhoeus'
require 'nokogiri'
require 'pathname'
# TapasDownloader class
class TapasDownloader
HTTP_COOKIE = ENV['TAPAS_HTTP_COOKIE'] || fail("Set a valid TAPAS_HTTP_COOKIE, e.g.\n`export TAPAS_HTTP_COOKIE=\"..\"`")
AUTHORIZATION = ENV['HTTP_AUTH'] || fail("Set a valid HTTP_AUTH, e.g.\n`export HTTP_AUTH=\"Basic Z3JlZ29yeS5pZ2VsbXVuZEBnbWFpbC5jb..\"`")
HEADERS = {
'Accept' => 'text/html',
'Accept-Language' => 'en-US,en;q=0.8',
'Connection' => 'keep-alive',
'Host' => 'rubytapas.dpdcart.com',
'Cookie' => HTTP_COOKIE,
'Authorization' => AUTHORIZATION,
'User-Agent' => 'Tapas Downloader'
}
RSS_URL = 'https://rubytapas.dpdcart.com/feed'
def initialize(download_dir: 'downloads')
@hydra = Typhoeus::Hydra.new(max_concurrency: 5)
@download_path = Pathname.new(download_dir)
end
def feed
@feed ||= begin
response = request(RSS_URL).run
response.code == 200 || fail("#{RSS_URL} returned #{response.code}")
response.body
end
end
def doc
@doc ||= Nokogiri::XML(feed)
end
def episode_urls
@urls ||= doc.xpath('//item/enclosure/@url').map(&:value)
end
def fetch_all
requests = episode_urls.map { |url| request(url) }
prepared_requests = filter_and_prepare_requests requests
prepared_requests.each { |request| @hydra.queue(request) }
if prepared_requests.size > 0
puts "Starting download process for #{prepared_requests.size} files"
@hydra.run
else
puts "No new files to download"
end
end
def filter_and_prepare_requests(requests)
requests.map do |request|
target_file = @download_path + File.basename(request.base_url)
if File.exists? target_file
puts "Skipping #{target_file} since the file already exists."
next
end
request.on_complete do |response|
on_complete_handler(request, response, target_file)
end
request
end.compact.flatten
end
def on_complete_handler(request, response, target_file)
fail "Boo #{request.base_url}" unless response.code == 200
puts "Finished: #{request.base_url}"
IO.binwrite target_file, response.body
puts ".. written to #{target_file}.\n"
end
private
def setup_dir
FileUtils.mkdir absolute_path_for('downloads')
end
def request(url, opts = {})
Typhoeus::Request.new url, headers: HEADERS
end
def absolute_path_for(path)
File.expand_path("../#{path}" || '..', __FILE__)
end
end
downloader = TapasDownloader.new download_dir: ARGV.first
downloader.fetch_all
ruby tapas.rb "/Users/gregoryigelmund/Movies/Educational/ruby-tapas"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment