Last active
September 15, 2024 20:50
-
-
Save abought/15a1e08705b121c1b7bd to your computer and use it in GitHub Desktop.
Extract all email addresses in from/to/cc fields of every msg in one Gmail folder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Create a connection to Gmail and do something with the results | |
References: | |
http://www.voidynullness.net/blog/2013/07/25/gmail-email-with-python-via-imap/ | |
and | |
https://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example-with-gmail/ | |
""" | |
__author__ = 'abought' | |
import email | |
import imaplib | |
import getpass | |
import sys | |
import re | |
from pprint import pprint as pp | |
# User may want to change these parameters if running script as-is | |
SEARCH_FOLDER = '[Gmail]Trash' # TODO: A user will want to change this | |
# Other folders: "INBOX", "[Gmail]/All Mail" | |
DEFAULT_MAIL_SERVER = 'imap.gmail.com' | |
# No user parameters below this line | |
ADDR_PATTERN = re.compile('<(.*?)>') # Finds email as <[email protected]> | |
def connect(user, pwd, server=DEFAULT_MAIL_SERVER): | |
"""Connect to [the specified] mail server. Return an open connection""" | |
conn = imaplib.IMAP4_SSL(server) | |
try: | |
conn.login(user, pwd) | |
except imaplib.IMAP4.error: | |
print "Failed to login" | |
sys.exit(1) | |
return conn | |
def print_folders(conn): | |
"""Print a list of open mailbox folders""" | |
for f in conn.list(): | |
print "\t", f | |
def get_folder(conn, folder_name): | |
"""Fetch a specific folder (or label) from server""" | |
if conn.state == "SELECTED": | |
# Explicitly close any previously opened folders; may not be necessary | |
conn.close() | |
rv, data = conn.select(folder_name) | |
if rv != 'OK': | |
print "Could not open specified folder. Known labels:" | |
print_folders(conn) | |
return conn | |
def get_email_ids(conn, query='ALL'): | |
"""Get the numeric IDs for all emails in a given folder""" | |
if conn.state != "SELECTED": | |
raise imaplib.IMAP4.error("Cannot search without selecting a folder") | |
rv, data = conn.uid('search', None, query) | |
if rv != 'OK': | |
print "Could not fetch email ids" # for some reason... | |
return [] | |
return data[0].split() | |
def fetch_message(conn, msg_uid ): | |
""" | |
Fetch a specific message uid (not sequential id!) from the given folder; | |
return the parsed message. User must ensure that specified | |
message ID exists in that folder. | |
""" | |
# TODO: Could we fetch just the envelope of the response to save bandwidth? | |
rv, data = conn.uid('fetch', msg_uid, "(RFC822)") | |
if rv != 'OK': | |
print "ERROR fetching message #", msg_uid | |
return {} | |
return email.message_from_string(data[0][1]) # dict-like object | |
def get_recipients(msg_parsed): | |
"""Given a parsed message, extract and return recipient list""" | |
recipients = [] | |
addr_fields = ['From', 'To', 'Cc', 'Bcc'] | |
for f in addr_fields: | |
rfield = msg_parsed.get(f, "") # Empty string if field not present | |
rlist = re.findall(ADDR_PATTERN, rfield) | |
recipients.extend(rlist) | |
return recipients | |
if __name__ == "__main__": | |
username = raw_input("Full email address: ") | |
password = getpass.getpass() | |
# Connect | |
mail_conn = connect(username, password) | |
# Open a specific folder and get list of email message uids | |
mail_conn = get_folder(mail_conn, SEARCH_FOLDER) | |
msg_uid_list = get_email_ids(mail_conn) | |
# Fetch a list of recipients | |
all_recipients = [] | |
for msg_uid in msg_uid_list: | |
msg = fetch_message(mail_conn, msg_uid) | |
recip_list = get_recipients(msg) | |
all_recipients.extend(recip_list) | |
# Very unsophisticated way of showing the recipient list | |
print "List of all recipients:" | |
print "------------" | |
pp(all_recipients) | |
print "\n\n List of all UNIQUE recipients:" | |
print "-------------------------------" | |
pp(set(all_recipients)) | |
try: | |
mail_conn.close() # Close currently selected folder (if any) | |
finally: | |
mail_conn.logout() | |
Thanks! That's what I was looking for. Is that a way of limiting by date?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello @abought,
Thanks for this little script! I updated it to use it with python3:
Best