Created
May 5, 2012 07:49
-
-
Save a-square/2600751 to your computer and use it in GitHub Desktop.
A blip.tv downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
########################################################################## | |
# example: perl get-blip.pl http://spoonyexperiment.com/.../ | |
# | |
# if it doesn't work, make sure you copied the url from the addres bar, | |
# not a link to avoid getting a redirection page | |
########################################################################## | |
use strict; | |
use URI; | |
use URI::Escape; | |
use IO::Socket; | |
use LWP::Simple; | |
# getlow(url) | |
# low-level HTTP GET that emulates a typical Google Chrome request | |
sub getlow { | |
# extract the host and path parts from the url | |
my $uri = URI->new(shift); | |
$uri =~ /http:\/\/(.+?)\//; | |
my $host = $1; | |
my $path = $uri->path; | |
# form a request (by hand to trick some overly diligent anti-spam systems) | |
my $request = <<END; | |
GET $path HTTP/1.1 | |
Host: $host | |
Connection: keep-alive | |
Cache-Control: max-age=0 | |
Accept: application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5 | |
User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.634.0 Safari/534.16 | |
Accept-Language: en-US,en;q=0.8 | |
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3 | |
END | |
# follow the HTTP line ending convention | |
$request =~ s/\n/\r\n/g; | |
# add an empty line at the end to signify the end of the headers section | |
$request .= "\r\n"; | |
# connect to the host | |
my $sock = IO::Socket::INET->new ( | |
PeerAddr => $host, | |
PeerPort => '80', | |
Proto => 'tcp', | |
) or die "Couldn't create a socket: $!\n"; | |
# send the request | |
print $sock $request; | |
# get the result piece by piece (as required by TCP) | |
my $result = ''; | |
while (<$sock>) { | |
$result .= $_; | |
} | |
# close the connection | |
close $sock; | |
return $result; | |
} | |
# save(url) | |
# uses LWP to mirror the file by its original filename in the current dir | |
sub save { | |
my $url = shift; | |
$url =~ /^.+\/(.+)$/; | |
my $fname = $1; | |
mirror($url, $fname); | |
} | |
############################################################################## | |
# the main routine | |
############################################################################## | |
# get the page from the supplied url and find all the embeddings | |
my $r = getlow(shift); | |
my @videos = ($r =~ /<embed[^>]+src="(.+?)"/g); | |
foreach (@videos) { | |
# make sure to only download from blip | |
if ($_ =~ /blip.tv/) { | |
my $url = $_; | |
print $url, "\n"; | |
# there are two types of blip urls, convert to the nicer one | |
if ($url =~ /http:\/\/a.blip.tv\/api.swf\#(.+)$/) { | |
$url = "http://blip.tv/play/$1"; | |
print ">> $url", "\n"; | |
} | |
# get that url (no reason for using getlow here, I'm just lazy) | |
my $vr = getlow($url); | |
# peel yet another layer of indirection to get an XML document | |
# that describes what resources should the blip.tv player | |
# choose from | |
$vr =~ /file=([^&\r]+)/; | |
$vr = getlow(uri_unescape($1)); | |
# download all the files that have movie extensions | |
# a factoid: recently Spoony started including mp3 versions of | |
# his reviews, which might be handy when dling his vlog entries | |
my @files = ($vr =~ /url="([^\"]+\.(?:mp4|m4v|mov|flv|mkv))"/); | |
foreach (@files) { | |
print '---', $_, "\n"; | |
save($_); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment