Created
September 20, 2011 15:45
-
-
Save ishiduca/1229457 to your computer and use it in GitHub Desktop.
COMIC ZIN検索の結果を ハッシュリファレンスで返す
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package WWW::Search::Scrape::Zin; | |
use strict; | |
use utf8; | |
#use Encode; | |
use Carp; | |
use URI; | |
use URI::Escape; | |
use Web::Scraper; | |
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); | |
require Exporter; | |
our $VERSION = '0.01'; | |
our @ISA = qw(Exporter); | |
our @EXPORT = qw(search); | |
our @EXPORT_OK = qw(); | |
my $home = 'http://shop.comiczin.jp'; | |
my $search = "${home}/products/list.php"; | |
sub search { | |
my %params = @_; | |
unless ($params{'keyword'}) { | |
Carp::carp qq(paramater "keyword" not found'); | |
return undef; | |
} | |
my $query = 'mode=search&name=' . uri_escape_utf8($params{'keyword'}); | |
my $uri = "${search}?${query}"; | |
my $scraper = scraper { | |
process '//form[@id="form1"]/ul/li/div/div[@class="data_area"]', 'list[]' => scraper { | |
process '//a', 'urlOfTitle' => [ '@href', sub { return $_->as_string; } ]; | |
process '//a/img', 'urlOfThumbnail' => [ '@src', sub { return $_->as_string; } ]; | |
process '//a/img', 'title' => '@alt'; | |
process '//p', 'circle' => [ 'TEXT', sub { | |
my @details = split /\s/, $_; | |
pop @details; # 版を削除 | |
pop @details; # 金額を削除 | |
pop @details; # 作者を削除 | |
return join(' ', @details); | |
} ]; | |
}; | |
}; | |
my $res; | |
eval{$res = $scraper->scrape( URI->new($uri) );}; | |
if ($@) { | |
Carp::carp "! failed: $@"; | |
return undef; | |
} | |
$res->{list}; | |
} | |
1; | |
__END__ | |
=head1 NAME | |
WWW::Search::Scrape::Zin | |
=head1 SYNOPSIS | |
use WWW::Search::Scrape::Zin; | |
use utf8; | |
use JSON; | |
my $result = WWW::Search::Scrape::Zin::search( | |
keyword => '放課後プレイ' | |
); | |
die qq(Dawn...) unless $result; | |
print encode_json $result; | |
=head1 DESCRIPTION | |
WWW::Search::Scrape::Zin provide a simple interface to get top search results from comiczin.jp and return a list of search results by hash reference. | |
=cut |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment