Created
March 28, 2009 06:53
-
-
Save xaicron/87042 to your computer and use it in GitHub Desktop.
youtube videos download script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!C:/Perl/bin/perl | |
# Youtubeから最高画質の動画をダウンロードする | |
# fmt35とかもいける | |
use strict; | |
use warnings; | |
use utf8; | |
use Encode; | |
use LWP::UserAgent; | |
use File::Basename; | |
use Web::Scraper; | |
use URI; | |
use JSON qw/from_json/; | |
# ファイル名のエンコード | |
my $enc = 'cp932'; | |
# 引数 | |
my $url = shift || die "Usage: $0 youtube_rul"; | |
# スクレイピング | |
my $uri = URI->new($url); | |
my $scraper = scraper { | |
process '/html/head/script', 'scripts[]' => 'html'; | |
process '//*[@id="watch-vid-title"]/h1', title => 'TEXT'; | |
}; | |
my $result = $scraper->scrape($uri) or die "Oops!"; | |
# JSON取得 | |
my $json; | |
for my $line (split qq{\n}, join q{}, @{$result->{scripts}}) { | |
if ($line =~ /^\s*var\s*swfArgs\s*=\s*({.*});/) { | |
$json = HTML::Entities::decode_entities($1); | |
last; | |
} | |
} | |
my $swfArgs = from_json $json; | |
# 最高画質のfmt取得 | |
my $fmt; | |
for my $map (split /,/, $swfArgs->{fmt_map}) { | |
next if $map =~ m|^(\d+)/(\d+)| and $2 eq '0'; # 2つ目の数値が0だったら存在しないっぽい? | |
$fmt = "&fmt=$1"; | |
last; | |
} | |
# 定義がなかったらfmt=18とする | |
$fmt = '&fmt=18' unless $fmt; | |
# ファイルの拡張子 | |
my $suffix = $fmt =~ /(18|22)/ ? '.mp4' : '.flv'; | |
# ダウンロードURL | |
my $video_url = sprintf "http://www.youtube.com/get_video?video_id=%s&t=%s%s", $swfArgs->{video_id}, $swfArgs->{t}, $fmt; | |
# 保存するファイル名 | |
my $filename = encode $enc, $result->{title} . $suffix; | |
# ファイルがあったら終了 | |
die "File exists ($filename)" if -f $filename; | |
# 進捗表示しつつダウンロード | |
open my $wfh, '>', $filename or die "$filename: $!"; | |
binmode $wfh; | |
print "$video_url\n"; | |
print "Downloading -> $filename\n"; | |
my $res = LWP::UserAgent->new->get( | |
$video_url, | |
':content_cb' => sub { | |
my ( $chunk, $res, $proto ) = @_; | |
print $wfh $chunk; | |
my $size = tell $wfh; | |
if (my $total = $res->header('Content-Length')) { | |
printf "%d/%d (%f%%)\r", $size, $total, $size/$total * 100; | |
} | |
else { | |
printf "%d/Unknown bytes\r", $size; | |
} | |
}, | |
); | |
close $wfh; | |
print "\n", $res->status_line, "\n"; | |
unlink $filename unless $res->is_success; | |
exit 1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment