Created
March 9, 2016 07:14
-
-
Save ilyaevseev/015406e76e464b80837c to your computer and use it in GitHub Desktop.
URLs validator, uses Perl and AnyEvent
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use AnyEvent::HTTP; | |
my %urls; | |
if (@ARGV == 1 and $ARGV[0] eq '-') { | |
my @lines = grep { not /^\s*#/ and not /^\s*$/ } <>; | |
chomp @lines; | |
%urls = map { $_ => -1 } @lines; | |
} else { | |
foreach (@ARGV) { | |
open F, $_ or die "Cannot open $_: $!\n"; | |
my @lines = grep { not /^\s*#/ and not /^\s*$/ } <F>; | |
close F; | |
chomp @lines; | |
$urls{$_} = -1 foreach @lines; | |
} | |
} | |
die "Empty URLs\n" unless %urls; | |
my $urls_count = scalar keys %urls; | |
my $finished_count = 0; | |
my $cv = AnyEvent->condvar; # $cv->send() finishes main loop! | |
foreach my $url (keys %urls) { | |
print STDERR "Starting $url\n"; | |
http_get $url, | |
recurse => 0, | |
sub { | |
my ($data, $headers) = @_; | |
printf STDERR "Finished $url, status = %s\n", | |
$urls{$url} = $headers->{Status}; | |
$cv->send if ++$finished_count >= $urls_count; | |
}; | |
} | |
$cv->recv; # ..mainloop | |
print "$_\n" foreach sort grep { $urls{$_} =~ /^[23]/ } keys %urls; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment