Last active
December 22, 2015 15:49
-
-
Save pixelastic/6494754 to your computer and use it in GitHub Desktop.
Téléchargement de toutes les conférences Paris Web 2012 au format audio. A écouter sans modération.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tmp | |
*part | |
*mp4 | |
*m4a |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
## Encoding : UTF-8 | |
require "shellwords" | |
require "digest/md5" | |
require "fileutils" | |
require "nokogiri" | |
require "open-uri" | |
require "addressable/uri" | |
class DownloadParisWeb | |
def initialize | |
check_dependencies | |
@list_url = "http://www.paris-web.fr/2012/" | |
FileUtils.mkdir_p('./tmp') | |
@done_file = File.expand_path('./tmp/done') | |
%x[touch #{@done_file.shellescape}] unless File.exists?(@done_file) | |
end | |
def check_dependencies | |
if %x[which youtube-dl] == "" | |
puts "You need to have youtube-dl installed to use this script" | |
puts "Check https://github.com/rg3/youtube-dl" | |
exit | |
end | |
if %x[youtube-dl --version] < "2013.09.07" | |
puts "Your version of youtube-dl is too old." | |
puts "Run youtube-dl -U to update to the latest" | |
exit | |
end | |
end | |
# Given a url, returns the cache location on disk | |
def get_cache_file(url) | |
File.expand_path(File.join("./", "tmp", Digest::MD5.hexdigest(url)+".url")) | |
end | |
# Returns a webpage html source, using cache if found | |
def get_page_source(url) | |
cache_file = get_cache_file(url) | |
# Returning cached version if exists and young enough | |
if File.exists?(cache_file) | |
return File.open(cache_file, "r").read | |
end | |
# Downloading page | |
# puts "Downloading #{url}" | |
content = open(url, 'r').read | |
File.open(cache_file, "w") do |cache| | |
cache.write(content) | |
end | |
return content | |
end | |
def run | |
# Getting list of all talk pages | |
list_content = get_page_source(@list_url) | |
doc = Nokogiri::HTML(list_content) | |
links = doc.css('table.programme a.url').map(){|l| l['href']} | |
done_links = File.readlines(@done_file).map(&:chomp) | |
links.each do |link| | |
content = get_page_source(link) | |
subdoc = Nokogiri::HTML(content) | |
# Getting title | |
title = subdoc.css('title').text.gsub('Paris Web – ', '') | |
# Gettin dailymotion iframes | |
iframes = subdoc.css('iframe') | |
iframes.each do |iframe| | |
uri = Addressable::URI.parse(iframe['src']) | |
next unless uri.host == "www.dailymotion.com" | |
# Converting url to a format youtube-dl can parse | |
uri.query = nil | |
uri.path.gsub!('/embed', '') | |
url = uri.to_s | |
next if done_links.include?(url) | |
# Downloading sound | |
puts "Downloading #{title}" | |
%x[youtube-dl -t -x "#{url}"] | |
# Marking file as downloaded | |
%x[echo "#{url}" >> #{@done_file}] | |
end | |
end | |
end | |
end | |
DownloadParisWeb.new().run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment