Skip to content

Instantly share code, notes, and snippets.

@leedo
Created August 13, 2012 19:55
Show Gist options
  • Save leedo/3343647 to your computer and use it in GitHub Desktop.
Save leedo/3343647 to your computer and use it in GitHub Desktop.
mirror files from old CDN
#!/usr/bin/env perl
use DBI;
my $dbh = DBI->connect("DBI:mysql:database=x", "x", "x");
my $pattern = "%media.arstechnica.com%";
my $sth = $dbh->prepare("SELECT entry_text, entry_text_more FROM mt_entry WHERE entry_text LIKE ? OR entry_text_more LIKE ?");
$sth->execute($pattern, $pattern);
while (my ($text, $more) = $sth->fetchrow_array) {
my @urls = $text =~ m{(http://media\.arstechnica\.com/[^\s<"']*)}g;
my @more = $text =~ m{(http://media\.arstechnica\.com/[^\s<"']*)}g;
print "$_\n" for (@urls, @more);
}
#!/usr/bin/env perl
use AnyEvent::HTTP;
use AnyEvent::AIO;
use IO::AIO;
use IO::AIO::Util qw/aio_mkpath/;
use constant WORKING => 1;
use constant DONE => 2;
use constant ERROR => 3;
my %urls;
my $base = "/var/www/origin.cdn.arstechnica.com";
my $cv = AE::cv;
while (<>) {
chomp;
my ($path, $file) = $_ =~ m{http://[^/]+/(.+)/(.+)};
my $fullpath = "$path/$file";
# been processed
next if exists $urls{$fullpath};
# already existed
if (-e "$base/$fullpath") {
$urls{$fullpath} = DONE;
next;
}
$urls{$fullpath} = WORKING;
$cv->begin;
my $error = sub {
warn "$fullpath $_[0]";
$urls{$fullpath} = ERROR;
$cv->end;
};
http_get "http://origin.arstechnica.com/$fullpath", sub {
my ($body, $headers) = @_;
if ($headers->{Status} == 200) {
aio_mkpath "$base/$path", 0755, sub {
if ($_[0]) {
$error->("error making path $!");
return;
}
aio_open "$base/$path/$file", IO::AIO::O_WRONLY | IO::AIO::O_TRUNC | IO::AIO::O_CREAT, 0644, sub {
my $fh = shift;
if (!$fh) {
$error->("error opening file $!");
return;
}
aio_write $fh, 0, undef, $body, 0, sub {
$urls{$fullpath} = DONE;
$cv->end;
};
};
};
}
else {
$error->("http error $headers->{Reason}");
}
}
}
$cv->recv;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment