Skip to content

Instantly share code, notes, and snippets.

@waleedahmad
Created July 8, 2018 11:41
Show Gist options
  • Save waleedahmad/b7f088510e94746e6348e2f6ed6393de to your computer and use it in GitHub Desktop.
Save waleedahmad/b7f088510e94746e6348e2f6ed6393de to your computer and use it in GitHub Desktop.
Read and Parse emails using POP3 in Python
import poplib, getpass, email
import re
Mailbox = poplib.POP3_SSL('mail.privateemail.com')
Mailbox.user('[email protected]')
Mailbox.pass_('pass')
emails = []
tries = 0
fail_tries = 300
subject = "Undelivered Mail Returned to Sender"
month = "Jul 2018"
not_found = 0
total_emails = 0
# Read dirty emails from emails.txt file
def read_emails():
global emails, total_emails
print('Reading dirty emails...')
print('Script will exit after failing to read following Subject and Month \n\
in parsed emails ', fail_tries, 'times')
print('Subject : ', subject, '\nMonth : ', month)
with open('emails.txt', 'r') as f:
emails = [line.strip() for line in f]
total_emails = len(emails)
# Write filtered emails to clean_emails.txt file
def write_emails():
global emails
print('Writing clean emails to file...')
file = open("clean_emails.txt","w")
for email in emails:
file.write(email+"\n")
file.close()
print('Done!')
# Total inbox messages
def get_total_emails():
return len(Mailbox.list()[1])
# Parse emails from mail text and filter dirty emails
def filter_emails(total):
global emails, tries, fail_tries, subject, month, not_found
for i in reversed(range(total)):
raw_email = b"\n".join(Mailbox.retr(i+1)[1])
parsed_email = email.message_from_bytes(raw_email)
if subject in parsed_email["Subject"] and month in parsed_email["Date"]:
payload = parsed_email.get_payload()[0]
body = payload.get_payload()
match = re.search(r'[\w\.-]+@[\w\.-]+', body)
remove_email = match.group(0).lstrip()
try:
# print(parsed_email["Date"])
# print(parsed_email["Subject"])
# print('Remove', remove_email)
emails.remove(remove_email)
except:
# print('Not Found', remove_email)
not_found += 1
pass
else:
tries += 1
if tries > fail_tries:
print('Exiting after failing', fail_tries, 'times')
break
read_emails()
filter_emails(get_total_emails())
write_emails()
print('Total Emails Provided : ' , total_emails)
print('Filtered Emails : ', len(emails))
print('Emails not found in emails.txt file : ', not_found)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment