Skip to content

Instantly share code, notes, and snippets.

@calpaterson
Created January 5, 2021 10:16
Show Gist options
  • Save calpaterson/6b304642aeecf789248be2055d699b7c to your computer and use it in GitHub Desktop.
Save calpaterson/6b304642aeecf789248be2055d699b7c to your computer and use it in GitHub Desktop.
parse out names from emails
#!/usr/bin/env python3
# this is designed to be used as part of a command line like:
# find Maildir/cur/ -type f -newermt '02/23/2019 0:00:00' | xargs ./emailfromparser.py | sort | uniq > new_emails.csv
from sys import argv, stdout, stderr
from csv import DictWriter
from email.parser import BytesHeaderParser
from email.utils import parseaddr
def main(filepaths):
parser = BytesHeaderParser()
csvwriter = DictWriter(stdout, ["fname", "sname", "email", "org"])
for filepath in filepaths:
try:
outdict = {}
with open(filepath, "rb") as fp:
msg = parser.parse(fp)
from_field = msg.get("From")
name, outdict["email"] = parseaddr(from_field)
try:
if "," in name:
# finance name
outdict["sname"], outdict["fname"] = name.split(",", maxsplit=1)
else:
outdict["fname"], outdict["sname"] = name.split(" ")
except ValueError:
outdict["fname"], outdict["sname"] = name, ""
outdict["org"] = outdict["email"].split("@", maxsplit=1)[-1]
csvwriter.writerow(outdict)
except Exception:
print(f"unable to parse {filepath}", file=stderr)
if __name__ == "__main__":
main(argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment