Skip to content

Instantly share code, notes, and snippets.

@Cvetomird91
Last active July 12, 2017 09:55
Show Gist options
  • Save Cvetomird91/03545d96eb796e68f51987a66a593a18 to your computer and use it in GitHub Desktop.
Save Cvetomird91/03545d96eb796e68f51987a66a593a18 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use strict;
use warnings;
use Mojo::DOM;
use Mojo::UserAgent;
my $root_url = 'http://index-of.co.uk/';
my $user_agent = 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5';
my $ua = Mojo::UserAgent->new;
$ua = $ua->max_response_size(16777216);
my $res = $ua->get($root_url => {UserAgent => $user_agent})->result->body;
my @dirnames = ();
my @book_urls = ();
my $dom = Mojo::DOM->new($res);
$dom->find('a img')->each(sub {
my ($i) = @_;
$i->parent->remove;
});
my $anchors = $dom->find('pre a');
$anchors->each(sub {
my ($anchor) = @_;
my $dirname = $anchor->attr('href');
push @dirnames, $dirname;
});
#remove unnecessary anchors
shift @dirnames for 1..4;
foreach(@dirnames) {
my $dir = $_;
my $url = $root_url . $dir;
my $temp_ua = Mojo::UserAgent->new;
my $temp_res = $temp_ua->get($url => {UserAgent => $user_agent})->result->body;
my $temp_dom = Mojo::DOM->new($temp_res);
$temp_dom->find('a img')->each(sub {
my ($i) = @_;
$i->parent->remove;
});
my $temp_anchors = $temp_dom->find('pre a');
#remove unnecessary anchors
shift @$temp_anchors for 1..5;
unless(-e $dir) {
mkdir($dir, 0700);
}
$temp_anchors->each(sub {
my ($a) = @_;
my $book_link = $a->attr('href');
my $book_url = $url . $book_link;
my $bookfile = $dir . $book_link;
push @book_urls, $book_url;
print($book_url."\n");
print($dir.$book_link."\n");
unless (-e $bookfile) {
my $ff = $ua->get($book_url);
$ff->result->content->asset->move_to($bookfile);
}
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment