Created
May 21, 2012 18:56
-
-
Save michalfapso/2763955 to your computer and use it in GitHub Desktop.
emailAddressVerifier.pl: Script for verification of a list of email addresses
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use Email::Valid; | |
use WWW::Mechanize; | |
use LWP::UserAgent; | |
#use Thread qw/async yield/; | |
use threads; | |
use threads::shared; | |
$MAX_THREADS_COUNT = 20; | |
my %threads :shared = (); | |
my $finished :shared = 0; | |
sub consumethreads | |
{ | |
#sleep 1; | |
#while (1) | |
while (!$finished) | |
{ | |
# print STDERR "consuming threads...finished=$finished\n"; | |
for my $email (keys %threads) | |
{ | |
# print "consumethreads: $email\n"; | |
$res = $threads{$email}->join(); | |
print STDERR "$email $res\n"; | |
$| = 1; | |
print "$email $res\n"; | |
delete $threads{$email}; | |
} | |
sleep 1; | |
} | |
print STDERR "consumethreads FINISHED\n"; | |
} | |
sub checkaddress | |
{ | |
my $email = shift; | |
my $br = WWW::Mechanize->new(agent => 'Mozilla/5.0'); | |
$br->agent('Mozilla/5.0'); | |
# my $url = "http://www.email-unlimited.com/tools/verify-email.aspx"; | |
my $url = "http://www.ipaddresslocation.org/email_lookup/check-email.php"; | |
my $response = $br->get($url); | |
my @forms = $br->forms(); | |
my $formscount = scalar(@forms); | |
#print STDERR "forms count:$formscount\n"; | |
if (scalar(@forms) != 3) | |
{ | |
sleep 2; | |
$response = $br->get($url); | |
} | |
my $form = $br->form_number(3); | |
if (!$form) | |
{ | |
return "HTTP COMMUNICATION ERROR"; | |
} | |
$form->value("email" => "$email"); | |
$response = $br->request($form->click); | |
my $html = $response->decoded_content; | |
# print "html: $html\n\n"; | |
# my @res = $html =~ /Result((<[^>]*>)|\s*)*([^<]*)</; | |
if ($html !~ /is not.*a valid deliverable e-mail box address/) | |
{ | |
# print $html; | |
return "OK"; | |
} | |
else { return "BAD"; } | |
# return $res[$#res]; | |
} | |
$i = 0; | |
$t_consumer = threads->new(\&consumethreads); | |
my $thread; | |
while (<STDIN>) | |
{ | |
$i++; | |
chomp; | |
print STDERR "$i: $_\n"; | |
if (!Email::Valid->address($_) || | |
/\s/) | |
{ | |
print "$_ INVALID ADDRESS FORMAT\n"; | |
next; | |
} | |
while (scalar(keys(%threads)) >= $MAX_THREADS_COUNT) | |
{ | |
sleep 1; | |
} | |
$thread = threads->new(\&checkaddress, $_); | |
share($thread); | |
$threads{$_} = $thread; | |
} | |
print STDERR "Waiting for running threads...\n"; | |
while (scalar(keys(%threads)) >= 0) | |
{ | |
print STDERR scalar(keys(%threads)) . " threads still running...\n"; | |
sleep 1; | |
} | |
print STDERR "Waiting for running threads...done\n"; | |
$finished = 1; | |
print STDERR "Links read...done\n"; | |
#print "threads size:" . keys( %threads ) . "\n" ; | |
$t_consumer->join(); | |
print STDERR "FINISHED\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment