Created
July 8, 2018 11:41
-
-
Save waleedahmad/b7f088510e94746e6348e2f6ed6393de to your computer and use it in GitHub Desktop.
Read and Parse emails using POP3 in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import poplib, getpass, email | |
import re | |
Mailbox = poplib.POP3_SSL('mail.privateemail.com') | |
Mailbox.user('[email protected]') | |
Mailbox.pass_('pass') | |
emails = [] | |
tries = 0 | |
fail_tries = 300 | |
subject = "Undelivered Mail Returned to Sender" | |
month = "Jul 2018" | |
not_found = 0 | |
total_emails = 0 | |
# Read dirty emails from emails.txt file | |
def read_emails(): | |
global emails, total_emails | |
print('Reading dirty emails...') | |
print('Script will exit after failing to read following Subject and Month \n\ | |
in parsed emails ', fail_tries, 'times') | |
print('Subject : ', subject, '\nMonth : ', month) | |
with open('emails.txt', 'r') as f: | |
emails = [line.strip() for line in f] | |
total_emails = len(emails) | |
# Write filtered emails to clean_emails.txt file | |
def write_emails(): | |
global emails | |
print('Writing clean emails to file...') | |
file = open("clean_emails.txt","w") | |
for email in emails: | |
file.write(email+"\n") | |
file.close() | |
print('Done!') | |
# Total inbox messages | |
def get_total_emails(): | |
return len(Mailbox.list()[1]) | |
# Parse emails from mail text and filter dirty emails | |
def filter_emails(total): | |
global emails, tries, fail_tries, subject, month, not_found | |
for i in reversed(range(total)): | |
raw_email = b"\n".join(Mailbox.retr(i+1)[1]) | |
parsed_email = email.message_from_bytes(raw_email) | |
if subject in parsed_email["Subject"] and month in parsed_email["Date"]: | |
payload = parsed_email.get_payload()[0] | |
body = payload.get_payload() | |
match = re.search(r'[\w\.-]+@[\w\.-]+', body) | |
remove_email = match.group(0).lstrip() | |
try: | |
# print(parsed_email["Date"]) | |
# print(parsed_email["Subject"]) | |
# print('Remove', remove_email) | |
emails.remove(remove_email) | |
except: | |
# print('Not Found', remove_email) | |
not_found += 1 | |
pass | |
else: | |
tries += 1 | |
if tries > fail_tries: | |
print('Exiting after failing', fail_tries, 'times') | |
break | |
read_emails() | |
filter_emails(get_total_emails()) | |
write_emails() | |
print('Total Emails Provided : ' , total_emails) | |
print('Filtered Emails : ', len(emails)) | |
print('Emails not found in emails.txt file : ', not_found) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment