Last active
April 15, 2018 18:44
-
-
Save mgermain/03f0f4689da2eeedc098e322714e477c to your computer and use it in GitHub Desktop.
Validate that emails are valid on SMTP server.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Dependencies python3 and dnspython | |
import os | |
import re | |
import sys | |
import csv | |
import time | |
import string | |
import random | |
import smtplib | |
import argparse | |
import dns.resolver | |
from dns.resolver import NXDOMAIN | |
from socket import gaierror | |
def valid_email_syntax(addressToVerify): | |
# Simple Regex for syntax checking | |
regex = "^[A-Za-z0-9\.\+_-]+@[A-Za-z0-9\._-]+\.[a-zA-Z]*$" | |
return not (re.match(regex, addressToVerify) is None) | |
def verify_mailbox(addressToVerify): | |
name = ''.join(random.choice(string.ascii_lowercase) for x in range(6)) | |
fromAddress = '{}@gmail.com'.format(name) | |
catchall_servers = ['google.com', 'microsoft.com', 'adobe.com', 'humanlongevity.com'] | |
# Get domain for DNS lookup | |
splitAddress = addressToVerify.split('@') | |
domain = str(splitAddress[1]) | |
if domain in catchall_servers: | |
return -2 | |
# MX record lookup | |
try: | |
answers = dns.resolver.query(domain, 'MX') | |
except NXDOMAIN as e: | |
return -1 | |
except Exception as e: | |
print(e) | |
return -666 | |
code = 0 | |
for rdata in sorted(answers): | |
server = smtplib.SMTP() | |
try: | |
code, message = server.connect(rdata.exchange.to_text()) | |
if code != 220: | |
# Could not connect | |
continue | |
server.ehlo_or_helo_if_needed() | |
code, message = server.mail(fromAddress) | |
if code != 250: | |
# Could not create fake mail | |
continue | |
code, message = server.rcpt(addressToVerify) | |
break | |
except gaierror as e: | |
# Cant connect to MX, try the next one | |
continue | |
except ConnectionRefusedError as e: | |
# Cant connect to MX, try the next one | |
continue | |
except TimeoutError as e: | |
# Cant connect to MX, try the next one | |
continue | |
except OSError as e: | |
if e.errno == 101: | |
# Can't find network, wait ant try the next MX | |
time.sleep(1) | |
continue | |
else: | |
raise | |
except smtplib.SMTPServerDisconnected as e: | |
# Disconnected from MX, try the next one | |
time.sleep(1) | |
continue | |
server.quit() | |
return code | |
def get_args(): | |
parser = argparse.ArgumentParser(description="Validate either an email list (coma separated no space), a file of emails (one per line) or a CMT3 invite file (.tsv).") | |
parser.add_argument('emails') | |
return parser.parse_args() | |
if __name__ == '__main__': | |
args = get_args() | |
# Load emails | |
if '@' in args.emails: | |
emails = args.emails.split(',') | |
elif os.path.isfile(args.emails): | |
with open(args.emails, 'r') as f: | |
if args.emails.endswith('.tsv'): | |
email_reader = csv.reader(f, dialect='excel-tab') | |
emails = [] | |
for name, middle_name, last_name, email, instution in email_reader: | |
emails.append(email) | |
else: | |
emails = f.readlines() | |
else: | |
print("File ({}) does not exists.".format(args.emails)) | |
exit(1) | |
# Validate emails | |
for email in filter(str.strip, emails): | |
email = email.lower().strip() | |
if not valid_email_syntax(email): | |
print("INVALID (syntax) - {}".format(email)) | |
continue | |
code = verify_mailbox(email) | |
if code == 250: | |
print("OK - {}".format(email)) | |
elif code == 550: | |
print("INVALID - {}".format(email)) | |
elif code == -1: | |
print("INVALID (domain) - {}".format(email)) | |
elif code == 450 or code == 451: | |
print("Unknown (greylisted) - {}".format(email)) | |
elif code == -2: | |
print("Unknown (catchall) - {}".format(email)) | |
else: | |
print("Unknown ({}) - {}".format(code, email)) | |
time.sleep(random.uniform(0, 5)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment