Created
July 17, 2017 11:36
-
-
Save bhochieng/fc21a8e22bc46d47dc94d86472de1178 to your computer and use it in GitHub Desktop.
Fetch emails from Gmail using python (modified code)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import email | |
import cPickle | |
import getpass | |
import imaplib | |
import datetime | |
IMAP_SERVER = 'imap.gmail.com' | |
def process_inbox(mail, email_address): | |
return_value, emails = mail.search(None, "ALL") | |
if return_value != 'OK': | |
print "No messages." | |
return | |
if os.path.exists('./pickled_attachments.pkl'): | |
pickled_emails = open('./pickled_attachments.pkl', 'r') | |
older_emails = cPickle.load(pickled_emails) | |
pickled_emails.close() | |
new_emails = [my_mail for my_mail in emails[0].split() if my_mail not in older_emails] | |
try: | |
for my_mail in new_emails: | |
return_value, raw_data = mail.fetch(my_mail, '(RFC822)') | |
if return_value != 'OK': | |
print "Error getting message", my_mail | |
return | |
message = email.message_from_string(raw_data[0][1]) | |
for part in message.walk(): | |
if part.get_content_type() in ['image/jpeg', 'image/png', 'image/bmp', 'application/msword', 'image/gif', 'image/x-icon', 'video/x-mpeg', 'application/mspowerpoint', 'application/pdf']:#sample attachment mimetypes, more can be added | |
body = part.get_payload(decode=True) | |
save_string = str("./" + email_address + "_attachments" + "/" + str(part.get_filename())) | |
myfile = open(save_string, 'wb') | |
myfile.write(body) | |
myfile.close() | |
else: | |
continue | |
older_emails.append(my_mail) | |
pickled_emails = open("./pickled_attachments.pkl", 'w') | |
cPickle.dump(older_emails, pickled_emails) | |
pickled_emails.close() | |
except: | |
pickled_emails = open("./pickled_attachments.pkl", 'w') | |
cPickle.dump(older_emails, pickled_emails) | |
pickled_emails.close() | |
else: | |
stored_emails = list() | |
try: | |
for my_mail in emails[0].split(): | |
return_value, raw_data = mail.fetch(my_mail, '(RFC822)') | |
if return_value != 'OK': | |
print "Error getting message", my_mail | |
return | |
message = email.message_from_string(raw_data[0][1]) | |
for part in message.walk(): | |
if part.get_content_type() in ['image/jpeg', 'image/png', 'image/bmp', 'application/msword', 'image/gif', 'image/x-icon', 'video/x-mpeg', 'application/mspowerpoint', 'application/pdf']:#sample attachment mimetypes, more can be added | |
body = part.get_payload(decode=True) | |
save_string = str("./" + email_address + "_attachments" + "/" + str(part.get_filename())) | |
myfile = open(save_string, 'wb') | |
myfile.write(body) | |
myfile.close() | |
else: | |
continue | |
stored_emails.append(my_mail) | |
pickled_emails = open("./pickled_attachments.pkl", 'w') | |
cPickle.dump(stored_emails, pickled_emails) | |
pickled_emails.close() | |
except: | |
pickled_emails = open("./pickled_attachments.pkl", 'w') | |
cPickle.dump(stored_emails, pickled_emails) | |
pickled_emails.close() | |
def get_inbox(mail): | |
return_value, inbox_mail = mail.select("INBOX") | |
return return_value | |
def main(): | |
mail = imaplib.IMAP4_SSL(IMAP_SERVER) | |
return_value = 0 | |
while return_value == 0: | |
email_address = raw_input('Email:') | |
try: | |
return_value, data = mail.login(email_address, getpass.getpass()) | |
except imaplib.IMAP4.error: | |
print "Login failed." | |
if os.path.exists("./" + email_address + "_attachments"): | |
ret = get_inbox(mail) | |
if ret == "OK": | |
process_inbox(mail, email_address) | |
mail.close() | |
else: | |
os.makedirs("./" + email_address + "_attachments") | |
ret = get_inbox(mail) | |
if ret == "OK": | |
process_inbox(mail, email_address) | |
mail.close() | |
mail.logout() | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import email | |
import cPickle | |
import getpass | |
import imaplib | |
import datetime | |
IMAP_SERVER = 'imap.gmail.com' | |
def process_inbox(mail, email_address): | |
return_value, emails = mail.search(None, "ALL") | |
if return_value != 'OK': | |
print "No messages." | |
return | |
if os.path.exists('./pickled_emails.pkl'): | |
pickled_emails = open('./pickled_emails.pkl', 'r') | |
older_emails = cPickle.load(pickled_emails) | |
pickled_emails.close() | |
new_emails = [my_mail for my_mail in emails[0].split() if my_mail not in older_emails] | |
try: | |
for my_mail in new_emails: | |
return_value, raw_data = mail.fetch(my_mail, '(RFC822)') | |
if return_value != 'OK': | |
print "Error getting message", my_mail | |
return | |
message = email.message_from_string(raw_data[0][1]) | |
date_tuple = email.utils.parsedate_tz(message['Date']) | |
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple)) | |
formatted_date = local_date.strftime("%d %b %Y %H:%M:%S") | |
for part in message.walk(): | |
if part.get_content_type() == "text/plain": | |
body = part.get_payload(decode=True) | |
save_string = str("./" + email_address + "_data" + "/" + str(my_mail) + ".(" + str(formatted_date) + ")-" + str(message['Subject']).replace("/", "-") + ".txt") | |
myfile = open(save_string, 'a') | |
myfile.write(body) | |
myfile.close() | |
else: | |
continue | |
older_emails.append(my_mail) | |
pickled_emails = open("./pickled_emails.pkl", 'w') | |
cPickle.dump(older_emails, pickled_emails) | |
pickled_emails.close() | |
except: | |
pickled_emails = open("./pickled_emails.pkl", 'w') | |
cPickle.dump(older_emails, pickled_emails) | |
pickled_emails.close() | |
else: | |
stored_emails = list() | |
try: | |
for my_mail in emails[0].split(): | |
return_value, raw_data = mail.fetch(my_mail, '(RFC822)') | |
if return_value != 'OK': | |
print "Error getting message", my_mail | |
return | |
message = email.message_from_string(raw_data[0][1]) | |
date_tuple = email.utils.parsedate_tz(message['Date']) | |
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple)) | |
formatted_date = local_date.strftime("%d %b %Y %H:%M:%S") | |
for part in message.walk(): | |
if part.get_content_type() == "text/plain": | |
body = part.get_payload(decode=True) | |
save_string = str("./" + email_address + "_data" + "/" + str(my_mail) + ".(" + str(formatted_date) + ")-" + str(message['Subject']) + ".txt") | |
myfile = open(save_string, 'a') | |
myfile.write(body) | |
myfile.close() | |
else: | |
continue | |
stored_emails.append(my_mail) | |
pickled_emails = open("./pickled_emails.pkl", 'w') | |
cPickle.dump(stored_emails, pickled_emails) | |
pickled_emails.close() | |
except: | |
pickled_emails = open("./pickled_emails.pkl", 'w') | |
cPickle.dump(stored_emails, pickled_emails) | |
pickled_emails.close() | |
def get_inbox(mail): | |
return_value, inbox_mail = mail.select("INBOX") | |
return return_value | |
def main(): | |
mail = imaplib.IMAP4_SSL(IMAP_SERVER) | |
return_value = 0 | |
while return_value == 0: | |
email_address = raw_input('Email:') | |
try: | |
return_value, data = mail.login(email_address, getpass.getpass()) | |
except imaplib.IMAP4.error: | |
print "Login failed." | |
if os.path.exists("./" + email_address + "_data"): | |
ret = get_inbox(mail) | |
if ret == "OK": | |
process_inbox(mail, email_address) | |
mail.close() | |
else: | |
os.makedirs("./" + email_address + "_data") | |
ret = get_inbox(mail) | |
if ret == "OK": | |
process_inbox(mail, email_address) | |
mail.close() | |
mail.logout() | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment