Skip to content

Instantly share code, notes, and snippets.

@s-celles
Last active August 14, 2017 14:53
Show Gist options
  • Save s-celles/22c6d5886941c183a0ea36812ea1db50 to your computer and use it in GitHub Desktop.
Save s-celles/22c6d5886941c183a0ea36812ea1db50 to your computer and use it in GitHub Desktop.
Find duplicate email
import click
import re
from collections import OrderedDict
def format_email(email, name):
if name is None:
return "<%s>" % email
else:
return "%s <%s>" % (name, email)
@click.command()
@click.argument('emails')
@click.option('--sep_in', help='Email separator (in)', default=", ")
@click.option('--sep_out', help='Email separator (out)', default=", ")
def main(emails, sep_in, sep_out):
lst_emails = emails.split(sep_in)
lst_emails = list(map(lambda s: s.strip(), lst_emails))
pattern = '(?:"?([^"]*)"?\s)?(?:<?(.+@[^>]+)>?)'
d_emails = OrderedDict()
lst_duplicate_emails = []
for s_name_email in lst_emails:
match = re.search(pattern, s_name_email)
s_name, s_email = match.groups()
if s_email not in d_emails:
d_emails[s_email] = s_name
else:
lst_duplicate_emails.append(s_name_email)
n_emails = len(lst_emails)
n_emails_uniq = len(d_emails)
n_duplicates = n_emails - n_emails_uniq
lst_emails = list(map(lambda t: format_email(t[0], t[1]),
d_emails.items()))
print("")
print("Emails: %d" % n_emails)
print("Duplicate(s): %d" % n_duplicates)
print(lst_duplicate_emails)
print("Emails (unique): %d" % n_emails_uniq)
print("")
s = sep_out.join(lst_emails)
print(s)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment