Last active
July 12, 2017 09:55
-
-
Save Cvetomird91/03545d96eb796e68f51987a66a593a18 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use Mojo::DOM; | |
use Mojo::UserAgent; | |
my $root_url = 'http://index-of.co.uk/'; | |
my $user_agent = 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5'; | |
my $ua = Mojo::UserAgent->new; | |
$ua = $ua->max_response_size(16777216); | |
my $res = $ua->get($root_url => {UserAgent => $user_agent})->result->body; | |
my @dirnames = (); | |
my @book_urls = (); | |
my $dom = Mojo::DOM->new($res); | |
$dom->find('a img')->each(sub { | |
my ($i) = @_; | |
$i->parent->remove; | |
}); | |
my $anchors = $dom->find('pre a'); | |
$anchors->each(sub { | |
my ($anchor) = @_; | |
my $dirname = $anchor->attr('href'); | |
push @dirnames, $dirname; | |
}); | |
#remove unnecessary anchors | |
shift @dirnames for 1..4; | |
foreach(@dirnames) { | |
my $dir = $_; | |
my $url = $root_url . $dir; | |
my $temp_ua = Mojo::UserAgent->new; | |
my $temp_res = $temp_ua->get($url => {UserAgent => $user_agent})->result->body; | |
my $temp_dom = Mojo::DOM->new($temp_res); | |
$temp_dom->find('a img')->each(sub { | |
my ($i) = @_; | |
$i->parent->remove; | |
}); | |
my $temp_anchors = $temp_dom->find('pre a'); | |
#remove unnecessary anchors | |
shift @$temp_anchors for 1..5; | |
unless(-e $dir) { | |
mkdir($dir, 0700); | |
} | |
$temp_anchors->each(sub { | |
my ($a) = @_; | |
my $book_link = $a->attr('href'); | |
my $book_url = $url . $book_link; | |
my $bookfile = $dir . $book_link; | |
push @book_urls, $book_url; | |
print($book_url."\n"); | |
print($dir.$book_link."\n"); | |
unless (-e $bookfile) { | |
my $ff = $ua->get($book_url); | |
$ff->result->content->asset->move_to($bookfile); | |
} | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment