Skip to content

Instantly share code, notes, and snippets.

@vmarkovtsev
Last active February 18, 2021 16:34
Show Gist options
  • Save vmarkovtsev/f690231f3e6ef147b831c567b0c1c61e to your computer and use it in GitHub Desktop.
Save vmarkovtsev/f690231f3e6ef147b831c567b0c1c61e to your computer and use it in GitHub Desktop.
import re
from metaphone import doublemetaphone
from unidecode import unidecode
nonalphanumeric_re = re.compile(r"[^\w ]+")
whitespace_re = re.compile(r" +")
def normalize_simple(name: str) -> str:
return whitespace_re.sub(" ", nonalphanumeric_re.sub(" ", name.lower()))
def normalize_unidecode(name: str) -> str:
return normalize_simple(unidecode(name))
def normalize_unidecode_metaphone(name: str) -> str:
return " ".join(doublemetaphone(part)[0]
for part in normalize_unidecode(name).split(" ")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment