Skip to content

Instantly share code, notes, and snippets.

@abought
Last active September 15, 2024 20:50
Show Gist options
  • Save abought/15a1e08705b121c1b7bd to your computer and use it in GitHub Desktop.
Save abought/15a1e08705b121c1b7bd to your computer and use it in GitHub Desktop.
Extract all email addresses in from/to/cc fields of every msg in one Gmail folder
"""Create a connection to Gmail and do something with the results
References:
http://www.voidynullness.net/blog/2013/07/25/gmail-email-with-python-via-imap/
and
https://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example-with-gmail/
"""
__author__ = 'abought'
import email
import imaplib
import getpass
import sys
import re
from pprint import pprint as pp
# User may want to change these parameters if running script as-is
SEARCH_FOLDER = '[Gmail]Trash' # TODO: A user will want to change this
# Other folders: "INBOX", "[Gmail]/All Mail"
DEFAULT_MAIL_SERVER = 'imap.gmail.com'
# No user parameters below this line
ADDR_PATTERN = re.compile('<(.*?)>') # Finds email as <[email protected]>
def connect(user, pwd, server=DEFAULT_MAIL_SERVER):
"""Connect to [the specified] mail server. Return an open connection"""
conn = imaplib.IMAP4_SSL(server)
try:
conn.login(user, pwd)
except imaplib.IMAP4.error:
print "Failed to login"
sys.exit(1)
return conn
def print_folders(conn):
"""Print a list of open mailbox folders"""
for f in conn.list():
print "\t", f
def get_folder(conn, folder_name):
"""Fetch a specific folder (or label) from server"""
if conn.state == "SELECTED":
# Explicitly close any previously opened folders; may not be necessary
conn.close()
rv, data = conn.select(folder_name)
if rv != 'OK':
print "Could not open specified folder. Known labels:"
print_folders(conn)
return conn
def get_email_ids(conn, query='ALL'):
"""Get the numeric IDs for all emails in a given folder"""
if conn.state != "SELECTED":
raise imaplib.IMAP4.error("Cannot search without selecting a folder")
rv, data = conn.uid('search', None, query)
if rv != 'OK':
print "Could not fetch email ids" # for some reason...
return []
return data[0].split()
def fetch_message(conn, msg_uid ):
"""
Fetch a specific message uid (not sequential id!) from the given folder;
return the parsed message. User must ensure that specified
message ID exists in that folder.
"""
# TODO: Could we fetch just the envelope of the response to save bandwidth?
rv, data = conn.uid('fetch', msg_uid, "(RFC822)")
if rv != 'OK':
print "ERROR fetching message #", msg_uid
return {}
return email.message_from_string(data[0][1]) # dict-like object
def get_recipients(msg_parsed):
"""Given a parsed message, extract and return recipient list"""
recipients = []
addr_fields = ['From', 'To', 'Cc', 'Bcc']
for f in addr_fields:
rfield = msg_parsed.get(f, "") # Empty string if field not present
rlist = re.findall(ADDR_PATTERN, rfield)
recipients.extend(rlist)
return recipients
if __name__ == "__main__":
username = raw_input("Full email address: ")
password = getpass.getpass()
# Connect
mail_conn = connect(username, password)
# Open a specific folder and get list of email message uids
mail_conn = get_folder(mail_conn, SEARCH_FOLDER)
msg_uid_list = get_email_ids(mail_conn)
# Fetch a list of recipients
all_recipients = []
for msg_uid in msg_uid_list:
msg = fetch_message(mail_conn, msg_uid)
recip_list = get_recipients(msg)
all_recipients.extend(recip_list)
# Very unsophisticated way of showing the recipient list
print "List of all recipients:"
print "------------"
pp(all_recipients)
print "\n\n List of all UNIQUE recipients:"
print "-------------------------------"
pp(set(all_recipients))
try:
mail_conn.close() # Close currently selected folder (if any)
finally:
mail_conn.logout()
@madeinlisboa
Copy link

Thanks! That's what I was looking for. Is that a way of limiting by date?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment