Skip to content

Instantly share code, notes, and snippets.

@jQwotos
Last active November 25, 2017 18:59
Show Gist options
  • Save jQwotos/4b8d8f3a7b030c0814c3de74031fb701 to your computer and use it in GitHub Desktop.
Save jQwotos/4b8d8f3a7b030c0814c3de74031fb701 to your computer and use it in GitHub Desktop.
Extracts email addresses from CUHacking Registration emails.
# Usage Instructions
# 1. Download the emails from Zoho
# 2. Extract them into the proper folder name, default is Emails
# 3. Run the script (in python3) and done!
from os import chdir
from glob import glob
import re
import csv
EMAILS_FOLDER = 'Emails'
TARGET_TEXT = 'Someone just submitted your form on cuhacking.com/. Here\'s what they had to='
AVOID_TEXT = 'description:'
CSV_FILE = 'emails.csv'
# Awesome email regex pat from user2032663 on https://stackoverflow.com/questions/16053797/regex-to-find-email-address-from-a-string
EMAIL_PAT = r'(?:[a-z0-9!#$%&\'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])</pre>'
def _read_file(fname):
f = open(fname, 'r')
f_content = f.read()
f.close()
return f_content
def retrieve_emails(EMAILS_FOLDER = EMAILS_FOLDER):
chdir(EMAILS_FOLDER)
all_email_names = glob("*.eml")
data = [_read_file(x) for x in all_email_names]
chdir('..')
return data
def _validate_email(email):
return True if TARGET_TEXT in email and AVOID_TEXT not in email else False
def filter_emails(emails):
return [x for x in emails if _validate_email(x)]
def _find_user_email_addr(email):
return re.search(EMAIL_PAT, email).group().replace('</pre>', '')
def remove_duplicates(emails):
return list(set(emails))
def save_to_csv(email_addrs, CSV_FILE = CSV_FILE):
f = open(CSV_FILE, 'w')
csv_writer = csv.writer(f, delimiter=',')
csv_writer.writerow(['Email'])
for email_addr in email_addrs:
csv_writer.writerow([email_addr])
f.close()
def main():
emails = filter_emails(retrieve_emails())
email_addrs = [_find_user_email_addr(email) for email in emails]
email_addrs = remove_duplicates(email_addrs)
save_to_csv(email_addrs)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment