Created
June 17, 2009 02:08
-
-
Save davekaro/131033 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'mechanize' | |
require 'nokogiri' | |
require 'open-uri' | |
def build_url(html) | |
title_begin = html =~ /<title>/ | |
title_end = html =~ /<\/title>/ | |
# get the title - ignore the YouTube prefix | |
title = html[title_begin + 17, (title_end - title_begin) - 17] | |
# need this "t" param based on bookmartlet from | |
# http://googlesystem.blogspot.com/2008/04/download-youtube-videos-as-mp4-files.html | |
t = html[(html =~ /"t"/) + 6, 44] | |
video_id = html[(html =~ /"video_id":/) + 13, 11] | |
is_hd_available = html[(html =~ /isHDAvailable/) + 16, 5] | |
format = is_hd_available =~ /true/ ? "22" : "18" | |
url = "http://www.youtube.com/get_video?fmt=#{format}" + | |
"&video_id=#{video_id}" + | |
"&t=#{t}" | |
{ :title => title.gsub("/", "-").gsub(":", "-"), :url => url } | |
end | |
agent = WWW::Mechanize.new | |
# youtube_video_ids just contains the ids (the part of the url after the "v=" | |
# when watching in your browser) one per line | |
ids = File.new("youtube_video_ids.txt", 'r') | |
ids.readlines.each do |id| | |
video_url = "http://www.youtube.com/watch?v=#{id.chomp}" | |
html = Nokogiri::HTML(open(video_url)) | |
video = build_url(html.to_s) | |
page = agent.get("#{video[:url]}") | |
File.new("/path/to/downloads/#{video[:title]}.mp4", "w").write page.body | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment