Created
June 12, 2017 18:30
-
-
Save Haolicopter/ae54ad5119cb0b1c936bf109bcdd9447 to your computer and use it in GitHub Desktop.
Scans through your email account and find all the unsubscribe links in all your emails.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Scans through your email account, | |
# find all the unsubscribe links in all your emails, | |
# and automatically opens them in a browser. | |
# This program will have to log in to your email provider's IMAP server | |
# and download all of your emails. | |
# You can use BeautifulSoup to check for any instance | |
# where the word unsubscribe occurs within an HTML link tag. | |
# Once you have a list of these URLs, | |
# you can use webbrowser.open() to | |
# automatically open all of these links in a browser. | |
# You'll still have to manually go through and | |
# complete any additional steps to unsubscribe yourself from these lists. | |
# In most case, this involves clicking a link to confirm. | |
import imaplib | |
import email | |
import email.header | |
import sys | |
import bs4 | |
import os | |
userEmail = '' | |
userEmailAppPass = '' | |
emailProviderImap = 'imap.gmail.com' | |
userEmailFolder = '"[Gmail]/All Mail"' | |
subLinksFileName = 'unsubscribeLinks' | |
# We only care about this many emails | |
# Set to 0 if need to process unlimited emails | |
limit = 100 | |
mail = imaplib.IMAP4_SSL(emailProviderImap) | |
try: | |
rv, data = mail.login(userEmail, userEmailAppPass) | |
except imaplib.IMAP4.error: | |
print("LOGIN FAILED!!! ") | |
sys.exit(1) | |
rv, data = mail.select(userEmailFolder, readonly=True) | |
if rv == 'OK': | |
print('Processing mailbox...') | |
else: | |
print('ERROR: Unable to open mailbox ', rv) | |
mail.logout() | |
sys.exit(1) | |
# TODO: add multi filters to search | |
# for example SENTSINCE | |
rv, data = mail.search(None, "TEXT unsubscribe") | |
emailCount = 0 | |
unsubLinks = [] | |
subLinksFile = open(subLinksFileName, 'w') | |
for num in data[0].split(): | |
try: | |
if limit != 0 and emailCount > limit: | |
print('Reached the maximum of emails we want to process') | |
break | |
print('Processing email id = ', num, '...', sep='') | |
rv, data = mail.fetch(num, '(RFC822)') | |
if rv != 'OK': | |
print("ERROR getting message", num) | |
continue | |
msg = email.message_from_bytes(data[0][1]) | |
if msg.is_multipart(): | |
for payload in msg.get_payload(): | |
body = payload.get_payload() | |
else: | |
body = msg.get_payload() | |
soup = bs4.BeautifulSoup(body, "html.parser") | |
links = soup.find_all('a', text='unsubscribe') | |
for link in links: | |
if 'href' not in link.attrs: | |
continue | |
href = link['href'] | |
if href not in unsubLinks: | |
unsubLinks.append(href) | |
print('Found new link to unsubscribe from: ', href) | |
# Write to csv file on drive, in case of memory overflow | |
subLinksFile.write(href + os.linesep) | |
except Exception as e: | |
print('We encountered an exception but we decided to keep going.') | |
print('The exception was: ', e) | |
pass | |
emailCount += 1 | |
print('Emails processed: ', emailCount) | |
print('Unsubscribe links found: ', len(unsubLinks)) | |
subLinksFile.close() | |
mail.close() | |
mail.logout() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment