Created
January 23, 2013 10:14
-
-
Save ksurent/4603988 to your computer and use it in GitHub Desktop.
AE::HTTP + AE::AIO combo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use v5.14; | |
use List::Util qw(shuffle); | |
use Parse::CPAN::Packages::Fast; | |
use List::MoreUtils qw(uniq any); | |
use AE; | |
use IO::AIO; | |
use AnyEvent::AIO; | |
use AnyEvent::HTTP; | |
my $index = Parse::CPAN::Packages::Fast->new('02packages.details.txt.gz'); | |
my @distributions = shuffle | |
uniq | |
map $index->package($_)->distribution->dist, | |
$index->packages; | |
@distributions = @distributions[0 .. 999]; | |
my @sections = qw(NAME ABSTRACT DESCRIPTION); | |
my $cv = AE::cv; | |
my $ctrl_c = AE::signal INT => sub { | |
$cv->send; | |
}; | |
sub do_next() { | |
return add_request(shift @distributions) if @distributions; | |
return $cv->send; | |
} | |
do_next for 1 .. 10; | |
$cv->recv; | |
sub add_request { | |
my $dist = shift; | |
my $dist_colon = $dist =~ s/-/::/gr; | |
http_get "http://api.metacpan.org/pod/$dist_colon?content-type=text/plain", sub { | |
my($body, $hdr) = @_; | |
return do_next unless $hdr->{Status} eq '200'; | |
my $relevant = extract_relevant($body); | |
return do_next if not defined $relevant or not length $relevant; | |
aio_open "downloaded/$dist.txt", IO::AIO::O_WRONLY | IO::AIO::O_CREAT, 0644, sub { | |
my $fh = shift or $cv->croak("aio_open(): $!"); | |
my $wrt; | |
$wrt = sub { | |
my($data, $data_offset, $cb) = @_; | |
use bytes; | |
aio_write $fh, undef, length $data, $data, $data_offset, sub { | |
my $written = shift; | |
$cv->croak("aio_write(): $!") if $written < 0; | |
$data_offset += $written; | |
if($data_offset < length $data) { | |
$wrt->($data, $data_offset, $cb); | |
} | |
else { | |
undef $wrt; | |
$cb->(); | |
} | |
}; | |
}; | |
$wrt->( | |
$relevant, | |
0, | |
sub { | |
aio_close $fh, sub { | |
do_next; | |
} | |
}, | |
); | |
}; | |
do_next; | |
}; | |
} | |
sub extract_relevant { | |
my $body = shift; | |
my $relevant; | |
open my $fh, '<', \$body; | |
my $in_section = 0; | |
while(<$fh>) { | |
if(/^([A-Z]+(?:\s+[A-Z])*)$/) { | |
$in_section = $in_section ? 0 : $1; | |
} | |
elsif($in_section and any { $in_section eq $_ } @sections) { | |
$relevant .= $_ . ' '; | |
} | |
} | |
close $fh; | |
$relevant; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment