Skip to content

Instantly share code, notes, and snippets.

@s3rgeym
Last active August 16, 2024 08:25
Show Gist options
  • Save s3rgeym/b7c6cec188430b6b57dec71a5240c277 to your computer and use it in GitHub Desktop.
Save s3rgeym/b7c6cec188430b6b57dec71a5240c277 to your computer and use it in GitHub Desktop.
Dump emails via IMAP
#!/usr/bin/env python
import argparse
import email
import imaplib
import pathlib
import re
import ssl
from typing import Optional, Sequence
IMAP_SSL_PORT = 993
IMAP_PORT = 143
def imap_utf7_decode(data: bytes) -> str:
"""
Decodes a folder name from IMAP's modified UTF-7 encoding to UTF-8.
Example:
>>> imap_utf7_decode(b'&BCMENAQwBDsENQQ9BD0ESwQ1-')
'Удаленные'
"""
return data.replace(b"&", b"+").replace(b",", b"/").decode("utf-7")
def sanitize_subject(name: str) -> str:
"""
Replaces characters that are not allowed in filenames on Windows and Linux and whitespaces.
"""
return re.sub(r'[<>:"/\\|?*\x00-\x1F\s]+', "_", name).strip('_')
def save_eml(
mail: imaplib.IMAP4, mail_id: str, output_dir: pathlib.Path
) -> None:
"""
Saves an email in EML format to the specified directory.
"""
result, data = mail.fetch(mail_id, "(RFC822)")
if result != "OK":
print(f"Failed to fetch email: {mail_id}")
return
raw_email = data[0][1]
message = email.message_from_bytes(raw_email)
subject = message["subject"] or "No Subject"
filename = f"{mail_id}_{sanitize_subject(subject)}.eml"
filepath = output_dir / filename
with filepath.open("wb") as eml_file:
eml_file.write(raw_email)
print(f"Email saved as {filepath}")
def dump_emails(
server: str,
port: int,
username: str,
password: str,
use_ssl: bool,
output_dir: pathlib.Path,
) -> None:
"""
Retrieves emails and saves them to the specified directory.
"""
if use_ssl:
context = ssl.create_default_context()
mail = imaplib.IMAP4_SSL(server, port, ssl_context=context)
else:
mail = imaplib.IMAP4(server, port)
mail.login(username, password)
result, mailboxes = mail.list()
if result != "OK":
print("Failed to retrieve the list of mailboxes.")
return
for mailbox in mailboxes:
mailbox_name = imap_utf7_decode(mailbox.split(b'"')[-2])
result, _ = mail.select(mailbox_name)
if result != "OK":
print(f"Failed to open mailbox: {mailbox_name}")
continue
result, data = mail.search(None, "ALL")
if result != "OK":
print(f"Failed to retrieve emails for mailbox: {mailbox_name}")
continue
mail_ids = data[0].decode().split()
mailbox_dir = output_dir / mailbox_name
mailbox_dir.mkdir(parents=True, exist_ok=True)
for mail_id in mail_ids:
save_eml(mail, mail_id, mailbox_dir)
mail.logout()
def main(argv: Optional[Sequence[str]] = None) -> None:
parser = argparse.ArgumentParser(
description="Utility to download all emails from a mailbox in EML format."
)
parser.add_argument(
"-s",
"--server",
required=True,
help="IMAP server address (e.g., imap.gmail.com)",
)
parser.add_argument(
"-u", "--username", required=True, help="Username or email address"
)
parser.add_argument(
"-p", "--password", required=True, help="Password for the mailbox"
)
parser.add_argument(
"-o",
"--output-dir",
required=True,
type=pathlib.Path,
help="Directory to save emails",
)
parser.add_argument(
"--port", type=int, default=None, help="Port for the connection"
)
parser.add_argument(
"--ssl", action="store_true", help="Use SSL for the connection"
)
args = parser.parse_args(argv)
dump_emails(
args.server,
args.port if args.port else IMAP_SSL_PORT if args.ssl else IMAP_PORT,
args.username,
args.password,
args.ssl,
args.output_dir,
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment