vyraun · September 20, 2017 07:38
diff --git a/get_email.py b/get_email.py
 # Slightly Modified version of https://gist.github.com/dideler/5219706

 from optparse import OptionParser
 import os.path
 import re
 import os
 from sets import Set

 regex = re.compile(("([a-z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+\/=?^_`"
                    "{|}~-]+)*(@|\sat\s)(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(\.|"
                    "\sdot\s))+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)"))

 def file_to_str(filename):
    """Returns the contents of filename as a string."""
    with open(filename) as f:
        return f.read().lower() # Case is lowered to prevent regex mismatches.

 def get_emails(s):
    """Returns an iterator of matched emails found in string s."""
    # Removing lines that start with '//' because the regular expression
    # mistakenly matches patterns like 'http://[email protected]' as '//[email protected]'.
    return (email[0] for email in re.findall(regex, s) if not email[0].startswith('//'))

 if __name__ == '__main__':   
    dir = os.getcwd()    
    unique_emails = Set([])
    for root, dirs, files in os.walk(dir):
        for arg in files:
            if os.path.isfile(arg) and arg != "get_email.py":
                for email in get_emails(file_to_str(arg)):
                    unique_emails.add(email)
                    print email
        else:
            print '"{}" is not a file to parse for emails!!'.format(arg)
                     
    f = open('unique_emails.txt', 'w+')       
    for item in list(unique_emails):
        f.write("%s\n" % item)
    f.close()
	# Slightly Modified version of https://gist.github.com/dideler/5219706

	from optparse import OptionParser
	import os.path
	import re
	import os
	from sets import Set

	regex = re.compile(("([a-z0-9!#$%&'+\/=?^_`{\|}~-]+(?:\.[a-z0-9!#$%&'+\/=?^_`"
	"{\|}~-]+)(@\|\sat\s)(?:[a-z0-9](?:[a-z0-9-][a-z0-9])?(\.\|"
	"\sdot\s))+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)"))

	def file_to_str(filename):
	"""Returns the contents of filename as a string."""
	with open(filename) as f:
	return f.read().lower() # Case is lowered to prevent regex mismatches.

	def get_emails(s):
	"""Returns an iterator of matched emails found in string s."""
	# Removing lines that start with '//' because the regular expression
	# mistakenly matches patterns like 'http://[email protected]' as '//[email protected]'.
	return (email[0] for email in re.findall(regex, s) if not email[0].startswith('//'))

	if __name__ == '__main__':
	dir = os.getcwd()
	unique_emails = Set([])
	for root, dirs, files in os.walk(dir):
	for arg in files:
	if os.path.isfile(arg) and arg != "get_email.py":
	for email in get_emails(file_to_str(arg)):
	unique_emails.add(email)
	print email
	else:
	print '"{}" is not a file to parse for emails!!'.format(arg)

	f = open('unique_emails.txt', 'w+')
	for item in list(unique_emails):
	f.write("%s\n" % item)
	f.close()