Skip to content

Instantly share code, notes, and snippets.

@jmb
Created July 14, 2018 21:58
Show Gist options
  • Save jmb/424e8e113f2a546349ff60d07f4eab3a to your computer and use it in GitHub Desktop.
Save jmb/424e8e113f2a546349ff60d07f4eab3a to your computer and use it in GitHub Desktop.
Script to extract all email addresses from all emails in all folders of an IMAP account.
"""Create a connection to an IMAP server and find ALL email addresses
Original script by abought: https://gist.github.com/abought/15a1e08705b121c1b7bd
References:
http://www.voidynullness.net/blog/2013/07/25/gmail-email-with-python-via-imap/
and
https://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example-with-gmail/
"""
__author__ = 'jmb'
import email
import imaplib
import getpass
import sys
import re
import os
# EDIT these as required:
FILENAME = 'out.txt'
DEFAULT_MAIL_SERVER = 'imap.server'
# No user parameters below this line
# ADDR_PATTERN = re.compile('<(.*?)>') # Finds email as <[email protected]>
# Find ALL email address in all fields:
ADDR_PATTERN = re.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)")
def connect(user, pwd, server=DEFAULT_MAIL_SERVER):
"""Connect to [the specified] mail server. Return an open connection"""
conn = imaplib.IMAP4_SSL(server)
try:
conn.login(user, pwd)
except imaplib.IMAP4.error:
print "Failed to login"
sys.exit(1)
return conn
def print_folders(conn):
"""Print a list of open mailbox folders"""
for f in conn.list():
print " ", f
def get_folder(conn, folder_name):
"""Fetch a specific folder (or label) from server"""
if conn.state == "SELECTED":
# Explicitly close any previously opened folders; may not be necessary
conn.close()
rv, data = conn.select(folder_name)
if rv != 'OK':
print "Could not open specified folder. Known labels:"
print_folders(conn)
return conn
def get_email_ids(conn, query='ALL'):
"""Get the numeric IDs for all emails in a given folder"""
if conn.state != "SELECTED":
raise imaplib.IMAP4.error("Cannot search without selecting a folder")
rv, data = conn.uid('search', None, query)
if rv != 'OK':
print "Could not fetch email ids" # for some reason...
return []
return data[0].split()
def fetch_message(conn, msg_uid):
"""
Fetch a specific message uid (not sequential id!) from the given folder;
return the parsed message. User must ensure that specified
message ID exists in that folder.
"""
# TODO: Could we fetch just the envelope of the response to save bandwidth?
rv, data = conn.uid('fetch', msg_uid, "(RFC822)")
if rv != 'OK':
print "ERROR fetching message #", msg_uid
return {}
return email.message_from_string(data[0][1]) # dict-like object
def get_recipients(msg_parsed):
"""Given a parsed message, extract and return recipient list"""
recipients = []
addr_fields = ['From', 'To', 'Cc', 'Bcc']
for f in addr_fields:
rfield = msg_parsed.get(f, "") # Empty string if field not present
rlist = re.findall(ADDR_PATTERN, rfield)
recipients.extend(rlist)
return recipients
if __name__ == "__main__":
username = raw_input("Full email address: ")
password = getpass.getpass()
# Connect
mail_conn = connect(username, password)
# Open output file
file = open(FILENAME, "a")
# Go through each folder
for f in mail_conn.list()[1]:
folder = f.split()[2].strip('"')
if folder == ".":
continue
mail_conn = get_folder(mail_conn, folder)
msg_uid_list = get_email_ids(mail_conn)
print "Scanning folder: ", folder, " with ", len(msg_uid_list), " messages"
# Fetch a list of recipients
all_recipients = []
for msg_uid in msg_uid_list:
msg = fetch_message(mail_conn, msg_uid)
recip_list = get_recipients(msg)
all_recipients.extend(recip_list)
print "Writing", len(set(all_recipients)), "email addresses to file", file.name
output = ""
for address in set(all_recipients):
output = output + address + "\n"
file.write(output)
file.flush()
os.fsync(file.fileno())
file.close()
print("\nWritten to file: " + FILENAME)
try:
mail_conn.close() # Close currently selected folder (if any)
finally:
mail_conn.logout()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment