Skip to content

Instantly share code, notes, and snippets.

@runapp
Created December 20, 2019 08:14
Show Gist options
  • Save runapp/a50de5bd4676247d9b4b9e7d987e3c71 to your computer and use it in GitHub Desktop.
Save runapp/a50de5bd4676247d9b4b9e7d987e3c71 to your computer and use it in GitHub Desktop.
Extract all email address in an .eml file, decoding all UTF-8 encoded names.
import re
NAME_PATTERN = re.compile(r'(?P<name>[^ "]+|".*") \<(?P<email>.*@.*)\>')
ENCODED_PATTERN = re.compile(r'=\?(?P<charset>.*)\?(?P<encoding>.*)\?(?P<encodedtext>.*)\?=')
import sys
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
import base64
heads = {"From", "To", "Cc"}
def decode_email_name(s):
m = ENCODED_PATTERN.match(s)
if not m:
#eprint(f"Not match in {s}")
return s
if m.group('charset') != 'UTF-8':
eprint('Encoding {} not supported'.format(m.group('charset')))
return s
if m.group('encoding') != 'B':
return s
return base64.b64decode(m.group('encodedtext')).decode('utf-8')
cur_tag = ''
for l in open("XXXX.eml"):
if l[0] != '\t':
pos = l.find(':')
if pos:
cur_tag = l[:pos]
l = l[pos+1:]
if cur_tag in heads:
m = NAME_PATTERN.search(l)
if m:
name, email = m.group('name'), m.group('email')
name = name.strip().strip('"')
print(f"{cur_tag:9} {decode_email_name(name)} <{email}>")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment