Created
September 24, 2011 07:27
-
-
Save ishiduca/1239076 to your computer and use it in GitHub Desktop.
COMIC ZIN検索の結果を ハッシュリファレンスで返す based AnyEvent
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package AnyEvent::Search::Scrape::Zin; | |
use strict; | |
use utf8; | |
use Carp; | |
use Encode; | |
use AnyEvent; | |
use AnyEvent::HTTP; | |
use URI::Escape; | |
use Web::Scraper; | |
our $VERSION = '0.01'; | |
my $home = 'http://shop.comiczin.jp'; | |
my $search = "${home}/products/list.php"; | |
sub new { | |
my $class = shift; | |
my %args = @_; | |
$args{keyword} || Carp::croak qq(! failed: "keyword" parameter not found.); | |
$args{callback} || Carp::croak qq(! failed: "callback" parameter not found.); | |
my $uri = join '', $search, '?mode=search&name=', uri_escape_utf8($args{keyword}); | |
my $self = bless {}, $class; | |
my $guard; $guard = sub { | |
http_get $uri, headers => $args{headers}, on_header => sub { | |
my $hdrs = shift; | |
if ($hdrs->{Status} ne '200') { | |
($args{on_error} || sub { die @_; })->("$uri: $hdrs->{Status} $hdrs->{Reason}"); | |
return; | |
} | |
return 1; | |
}, sub { | |
my($body, $headers) = @_; | |
return $guard->() unless $body; | |
my $res = _get_list(decode_utf8 $body); | |
($args{callback})->($res); | |
}; | |
}; | |
$self->{guard} = AnyEvent::Util::guard { undef $guard; }; | |
$guard->(); | |
return $self; | |
} | |
sub _get_list { | |
my $html = shift; | |
my $scraper = scraper { | |
process '//form[@id="form1"]/ul/li/div/div[@class="data_area"]', 'list[]' => scraper { | |
process '//a', 'urlOfTitle' => [ '@href', sub { return $_->as_string; } ]; | |
process '//a/img', 'urlOfThumbnail' => [ '@src', sub { return $_->as_string; } ]; | |
process '//a/img', 'title' => '@alt'; | |
process '//p', 'circle' => [ 'TEXT', sub { | |
my @details = split /\s/, $_; | |
pop @details; # 版を削除 | |
pop @details; # 金額を削除 | |
pop @details; # 作者を削除 | |
return join(' ', @details); | |
} ]; | |
}; | |
}; | |
my $res; | |
eval { $res = $scraper->scrape($html, $home); }; | |
if ($@) { | |
Carp::carp qq(! failed: $@); | |
return undef; | |
} | |
$res->{list}; | |
} | |
1; | |
__END__ | |
=head1 NAME | |
AnyEvent::Search::Scrape::Zin - Comic ZIN Search interface for AnyEvent-based programs | |
=head1 SYNOPSIS | |
use utf8; | |
use AnyEvent::Search::Scrape::Zin; | |
use JSON; | |
my $cv = AnyEvent->condvar; | |
my $client = AnyEvent::Search::Scrape::Zin->new( | |
keyword => 'Ash wing', | |
callback => sub { | |
my $response = shift; | |
print encode_json $response; | |
$cv->send(); | |
}, | |
); | |
$cv->recv(); | |
=head1 DESCTIPTION | |
AnyEvent::Search::Scrape::Zin provide a simple interface for AnyEvent-based programs. | |
it get top search results from shop.comiczin.jp and return a list of search results by hash reference. | |
=cut |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment