Created
September 24, 2011 16:01
-
-
Save ishiduca/1239483 to your computer and use it in GitHub Desktop.
メロンブックス検索の結果を ハッシュリファレンスで返す
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package WWW::Search::Scrape::Melon; | |
use strict; | |
use Carp; | |
use utf8; | |
use Encode; | |
use LWP::UserAgent; | |
use URI::Escape; | |
use Web::Scraper; | |
use Data::Dumper; | |
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); | |
require Exporter; | |
our $VERSION = '0.01'; | |
our @ISA = qw(Exporter); | |
our @EXPORT = qw(search); | |
our @EXPORT_OK = qw(); | |
my $home = 'http://shop.melonbooks.co.jp'; | |
my $check_age = "${home}/shop/check_age.php"; | |
my $index_php = "${home}/shop/index.php"; | |
my $pass_checked = "${home}/shop/top/main"; | |
my $search = "${home}/shop/list/"; | |
sub search { | |
my %params = @_; | |
unless ($params{keyword}) { | |
Carp::carp qq(! failed: "keyword" parameter not found.); | |
return undef; | |
} | |
my $ua = LWP::UserAgent->new( cookie_jar => {} ); | |
push @{ $ua->requests_redirectable }, 'POST'; | |
my $res = $ua->get($check_age); | |
unless ($res->is_success) { | |
Carp::carp $res->status_line; | |
return undef; | |
} | |
if ($params{'_test'}) { | |
Carp::carp Dumper $res; | |
Carp::carp qq(-----------------------------------\n); | |
} | |
$res = $ua->post($index_php, | |
'Referer' => $check_age, | |
content => { 'LIVRET' => 'off', 'RATED' => '18' }, | |
); | |
unless ($res->is_success) { | |
Carp::carp $res->status_line; | |
return undef; | |
} | |
if ($params{'_test'}) { | |
Carp::carp Dumper $res; | |
Carp::carp qq(-----------------------------------\n); | |
} | |
my $query = uri_escape_utf8 $params{keyword}; | |
$res = $ua->get("${search}?DA=de&F=${query}&ST=0&SC=0&G=&E=ON&CR[]=18&CR[]=15&CR[]=0&O=maker&P=30&DS=desc", | |
'Referer' => $pass_checked, | |
); | |
unless ($res->is_success) { | |
Carp::carp $res->status_line; | |
return undef; | |
} | |
if ($params{'_test'}) { | |
Carp::carp Dumper $res; | |
Carp::carp qq(-----------------------------------\n); | |
} | |
_get_list($res->decoded_content); | |
} | |
sub _get_list { | |
my $html = shift; | |
my $scraper = scraper { | |
process '/html/body/table/tbody/tr[3]/td[2]/table/tbody/tr[2]/td/div/table/tr', 'lists[]' => scraper { | |
process '//td[1]/table[@class="list_desc_innertable_img"]/tr/td/div/a/img', 'urlOfThumbnail' => [ '@src', sub { $_->as_string } ]; | |
process '//td[2]/table/tr[1]/td/font', 'title' => 'TEXT'; | |
process '//td[2]/table/tr[2]/td/font/a', 'circle' => 'TEXT'; | |
process '//td[2]/table/tr[2]/td/font/a', 'urlOfCircle' => [ '@href', sub { $_->as_string } ]; | |
}; | |
}; | |
my $results = $scraper->scrape($html, 'http://shop.melonbooks.co.jp'); | |
return [ grep{ $_->{title} and $_ }@{$results->{lists}} ]; | |
} | |
1; | |
__END__ | |
=head1 NAME | |
WWW::Search::Scrape::Melon | |
=head1 SYNOPSIS | |
use WWW::Search::Scrape::Melon; | |
use utf8; | |
use JSON; | |
my $result = WWW::Search::Scrape::Melon::search( | |
keyword => '紅茶屋' | |
); | |
die qq(Dawn...) unless $result; | |
print encode_json $result; | |
=head1 DESCRIPTION | |
WWW::Search::Scrape::Melon provide a simple interface to get top search results from melonbooks.co.jp | |
and return a list of search results by hash reference. | |
=cut |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment