Created
May 18, 2014 14:12
-
-
Save mtsukamoto/bebe76f5ee740b41e23f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use utf8; | |
use WWW::Mechanize; | |
use Encode qw(encode decode); | |
my $auth = { | |
email => $ENV{'BOOKSCAN_MAIL'}, | |
password => $ENV{'BOOKSCAN_PASS'}, | |
}; | |
my $mech = WWW::Mechanize->new; | |
$mech->env_proxy(); | |
my $url = 'https://system.bookscan.co.jp/history.php'; | |
$mech->get( $url ); | |
$mech->submit_form( fields => $auth ); | |
my @details = &parse_history($mech); | |
foreach my $detail (@details) { | |
printf("[%s] %s\n", $detail->{'date'}, $detail->{'url'}); | |
$mech->get($detail->{'url'}); | |
my ($number, @pdfs) = &parse_detail($mech); | |
printf("[number:%s]\n", $number); | |
my $directory = sprintf("%s_%s", $number, $detail->{'date'}); | |
printf("[directory:%s]\n", $directory); | |
if (not -d $directory) { | |
print "mkdir($directory)\n"; | |
mkdir($directory, 0777); | |
} | |
foreach my $pdf (@pdfs) { | |
my $url = $pdf->{'url'}; | |
my $file = sprintf("%s/%s", $directory, $pdf->{'title'}); | |
# $file = encode('cp932', decode('UTF-8', $file)); | |
$file = encode('cp932', $file); | |
next if (-e $file); | |
printf("%s\n->$file\n", $url); | |
$mech->get($url, ':content_file' => $file); | |
print "\n"; | |
} | |
} | |
sub parse_history { | |
my $mech = shift; | |
my $content = $mech->content; | |
my @resutls = (); | |
my @rows = ($content =~ /<tr[^<>]*>(.*?)<\/tr>/gs); | |
foreach my $row (@rows) { | |
next unless ($row =~ /<td width=100>.*?<a href='(\Qbookdetail.php?hash=\E[^']+)'>/s); | |
my $result = { 'url' => 'https://system.bookscan.co.jp/' . $1 }; | |
if ($row =~ /<td width=130 style="width:117px">((\d+)\D+(\d+)\D+(\d+)\D+)<\/td>/s) { | |
my ($string, $year, $month, $day) = ($1, $2, $3, $4); | |
$result->{'date'} = sprintf('%04d.%02d.%02d', $year, $month, $day); | |
$result->{'datestring'} = $string; | |
} | |
push(@resutls, $result); | |
} | |
return @resutls; | |
} | |
sub parse_detail { | |
my $mech = shift; | |
my $content = $mech->content; | |
my @resutls = (); | |
my @links = ($content =~ /<a href="\Qdownload.php?\E.+?\.pdf" class="downloading">[^<>]+?<\/a>/g); | |
my $number = $1 if ($content =~ /<input type="hidden" name="d" value="(.*?)">/); | |
foreach my $link (@links) { | |
next unless ($link =~ /<a href="(\Qdownload.php?\E.+?\.pdf)" class="downloading">([^<>]+?)<\/a>/); | |
my $result = { | |
'url' => 'https://system.bookscan.co.jp/' . $1, | |
'title' => $2 | |
}; | |
push(@resutls, $result); | |
} | |
return ($number, @resutls); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Windows+Perl環境用の、Bookscanの依頼済み書籍のPDFをダウンロードするためのスクリプトです。