Skip to content

Instantly share code, notes, and snippets.

@vti
Created July 4, 2012 16:00
Show Gist options
  • Save vti/3048049 to your computer and use it in GitHub Desktop.
Save vti/3048049 to your computer and use it in GitHub Desktop.
Books publications
#!/usr/bin/env perl
use strict;
use warnings;
use URI;
use URI::Escape;
use Digest::SHA qw(hmac_sha256_base64);
use LWP::UserAgent;
use XML::LibXML;
my $ASSOCIATE_TAG = '';
my $TOKEN = '';
my $SECRET_KEY = '';
my $VERSION = '2011-08-01';
my $NS = "http://webservices.amazon.com/AWSECommerceService/$VERSION";
my $BASE_URI = URI->new(
'http://webservices.amazon.com/onca/xml?Service=AWSECommerceService');
my ($subject) = @ARGV or die "Usage: $0 <subject>";
for my $year (2002 .. 2012) {
my $page = 1;
my $last_page = 10;
while ($page <= $last_page) {
my $uri = build_uri(
$BASE_URI, $SECRET_KEY,
ResponseGroup => 'Medium',
Operation => 'ItemSearch',
SearchIndex => 'Books',
Sort => 'daterank',
Power => "subject:$subject and subject:programming "
. "and keyword:$subject and pubdate: $year "
. "and language: English and not fiction",
ItemPage => $page
);
my $result = fetch_uri($uri);
my $dom = XML::LibXML->load_xml(string => $result);
my $xc = XML::LibXML::XPathContext->new($dom->documentElement());
$xc->registerNs('ns', $NS);
my $books = [];
my @nodes = $xc->findnodes('//ns:TotalPages');
my $total_pages = $nodes[0]->textContent;
if ($total_pages > 10) {
die "TotalPage = $total_pages";
}
else {
warn "$year: $page/$total_pages";
$last_page = $total_pages;
}
@nodes = $xc->findnodes('//ns:ItemAttributes');
foreach my $node (@nodes) {
my @children = $node->childNodes;
my $book = {};
foreach my $child (@children) {
$book->{$child->nodeName} = $child->textContent || '';
}
push @$books, $book;
}
foreach my $book (@$books) {
next unless $book->{Title} =~ m/$subject/i;
next unless $book->{Binding} =~ m/(?:paperback|hardcover)/i;
next if $book->{Title} =~ m/catalyst|rails|sinatra|django/i;
print join ';', $book->{PublicationDate}, $book->{Title},
($book->{Edition} || ''), "\n";
}
$page++;
sleep 1;
}
}
sub fetch_uri {
my ($uri) = @_;
my $ua = LWP::UserAgent->new();
my $response = $ua->get($uri);
if ($response->is_success) {
return $response->decoded_content;
}
else {
die $response->status_line;
}
}
sub build_uri {
my ($uri, $secret, %params) = @_;
$uri->query_form(
'Service' => 'AWSECommerceService',
'AWSAccessKeyId' => $TOKEN,
'Version' => $VERSION,
'AssociateTag' => $ASSOCIATE_TAG,
map { $_, $params{$_} } sort keys %params,
);
return sign_request($uri, $secret);
}
# Taken from Net::Amazon
sub sign_request {
my ($uri, $secret) = @_;
# This assumes no duplicated keys. Safe assumption?
my %query = $uri->query_form;
my @now = gmtime;
$query{Timestamp} ||= sprintf(
'%04d-%02d-%02dT%02d:%02d:%02dZ',
$now[5] + 1900,
$now[4] + 1,
@now[3, 2, 1, 0]
);
my $qstring = join '&',
map { "$_=" . uri_escape($query{$_}, "^A-Za-z0-9\-_.~") }
sort keys %query;
# Use chr(10), not "\n" which varies by platform
my $signme = join chr(10), "GET", $uri->host, $uri->path, $qstring;
my $sig = hmac_sha256_base64($signme, $SECRET_KEY);
# Digest does not properly pad b64 strings
$sig .= '=' while length($sig) % 4;
$sig = uri_escape($sig, "^A-Za-z0-9\-_.~");
$qstring .= "&Signature=$sig";
$uri->query($qstring);
return $uri;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment