Skip to content

Instantly share code, notes, and snippets.

@vyraun
Created September 20, 2017 07:38
Show Gist options
  • Save vyraun/e84c67899fb3aca50bab38017e93dc10 to your computer and use it in GitHub Desktop.
Save vyraun/e84c67899fb3aca50bab38017e93dc10 to your computer and use it in GitHub Desktop.
Drop this file in a Log Directory and It Will extract all the emails in a file.
# Slightly Modified version of https://gist.github.com/dideler/5219706
from optparse import OptionParser
import os.path
import re
import os
from sets import Set
regex = re.compile(("([a-z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+\/=?^_`"
"{|}~-]+)*(@|\sat\s)(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(\.|"
"\sdot\s))+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)"))
def file_to_str(filename):
"""Returns the contents of filename as a string."""
with open(filename) as f:
return f.read().lower() # Case is lowered to prevent regex mismatches.
def get_emails(s):
"""Returns an iterator of matched emails found in string s."""
# Removing lines that start with '//' because the regular expression
# mistakenly matches patterns like 'http://[email protected]' as '//[email protected]'.
return (email[0] for email in re.findall(regex, s) if not email[0].startswith('//'))
if __name__ == '__main__':
dir = os.getcwd()
unique_emails = Set([])
for root, dirs, files in os.walk(dir):
for arg in files:
if os.path.isfile(arg) and arg != "get_email.py":
for email in get_emails(file_to_str(arg)):
unique_emails.add(email)
print email
else:
print '"{}" is not a file to parse for emails!!'.format(arg)
f = open('unique_emails.txt', 'w+')
for item in list(unique_emails):
f.write("%s\n" % item)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment