Last active
September 27, 2022 18:54
-
-
Save stefansundin/a99bbfb6cda873d14fd2 to your computer and use it in GitHub Desktop.
Extract attachments from emails that Gmail doesn't allow you to download. This is dumb. Please use Python >= 3.4.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Get your files that Gmail block. Warning message: | |
# "Anti-virus warning - 1 attachment contains a virus or blocked file. Downloading this attachment is disabled." | |
# Based on: https://spapas.github.io/2014/10/23/retrieve-gmail-blocked-attachments/ | |
# Instructions: | |
# Go to your emails, click the arrow button in the top right, "Show original", then "Download Original". | |
# Move the files to the same directory as this program, then run it. | |
import sys | |
import os | |
import email | |
import mailbox | |
from email import policy | |
from email.parser import BytesParser | |
# To automatically rename conflicing files, change this to True: (it adds a prefix to the filename, see get_new_filename below) | |
automatic_rename = False | |
def get_new_filename(old_fn): | |
i = 1 | |
while True: | |
new_fn = "%d-%s" % (i, old_fn) | |
if not os.path.isfile(new_fn): | |
return new_fn | |
i += 1 | |
if __name__ == '__main__': | |
if sys.version_info[0] < 3: | |
print("Please use Python 3.") | |
sys.exit() | |
if len(sys.argv) < 2: | |
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith(('.txt', '.eml', '.mbox'))] | |
if len(files) == 0: | |
print("Please download the emails and put them in this directory with a .txt, .eml, or .mbox extension.") | |
sys.exit() | |
print("Here are the files in the current directory with .txt, .eml, or .mbox extension:") | |
print('\n'.join(files)) | |
print() | |
print("Press enter to extract attachments from these files.") | |
input() | |
else: | |
files = sys.argv[1:] | |
for f in files: | |
print("Processing %s" % f) | |
if f.endswith('.mbox'): | |
messages = mailbox.mbox(f, factory=BytesParser(policy=policy.default).parse, create=False) | |
print("%d messages." % len(messages)) | |
print() | |
for msg in messages: | |
print("Subject: %s" % msg['Subject']) | |
for pl in msg.get_payload(): | |
if isinstance(pl, str): | |
# not sure if there's a better way to skip this junk data | |
continue | |
fn = pl.get_filename() | |
if fn: | |
print("Found %s" % fn) | |
if os.path.isfile(fn): | |
print("The file '%s' already exists!" % fn) | |
if automatic_rename: | |
fn = get_new_filename(fn) | |
print("Changed the filename to: %s" % fn) | |
else: | |
print("Press enter to overwrite.") | |
input() | |
open(fn, 'wb').write(pl.get_payload(decode=True)) | |
print() | |
else: | |
msg = email.message_from_file(open(f), policy=policy.default) | |
print("Subject: %s" % msg['Subject']) | |
for pl in msg.get_payload(): | |
fn = pl.get_filename() | |
if fn: | |
print("Found %s" % fn) | |
if os.path.isfile(fn): | |
print("The file '%s' already exists!" % fn) | |
if automatic_rename: | |
fn = get_new_filename(fn) | |
print("Changed the filename to: %s" % fn) | |
else: | |
print("Press enter to overwrite.") | |
input() | |
open(fn, 'wb').write(pl.get_payload(decode=True)) | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Made some updates:
.eml
and not.txt
, so the script now discovers these files as well.policy.default
. Unicode in subject lines should decode properly now..mbox
format. This is the format that Google Takeout gives you the emails in. (I used https://gist.github.com/benwattsjones/060ad83efd2b3afc8b229d41f9b246c4 as a reference.)automatic_rename = True
).Enjoy!
Anyone still using this? :)