Skip to content

Instantly share code, notes, and snippets.

@dillmo
Created May 17, 2019 21:08
Show Gist options
  • Save dillmo/cad8ce9517cc7774fb8c01efa058b6e2 to your computer and use it in GitHub Desktop.
Save dillmo/cad8ce9517cc7774fb8c01efa058b6e2 to your computer and use it in GitHub Desktop.
def soundex(name):
code = {
'b': '1',
'f': '1',
'p': '1',
'v': '1',
'c': '2',
'g': '2',
'j': '2',
'k': '2',
'q': '2',
's': '2',
'x': '2',
'z': '2',
'd': '3',
't': '3',
'l': '4',
'm': '5',
'n': '5',
'r': '6'
}
vowels = ('a', 'e', 'i', 'o', 'u')
# Generate a basic soundex encoding
raw_sndx = [code[c] if c in code else -1 if c in vowels else -2 for c in name.lower()]
# Prune the encoding
pruned_sndx = []
for i in range(2, len(raw_sndx)):
if (raw_sndx[i] != raw_sndx[i-1] and
not (i < len(raw_sndx) - 2 and raw_sndx[i+1] == -1 and raw_sndx[i] == raw_sndx[i+2])
and int(raw_sndx[i]) > 0):
pruned_sndx.append(raw_sndx[i])
# Format the encoding
sndx = [name[0], '-'] + (pruned_sndx[1:] if int(pruned_sndx[0]) < 0 else pruned_sndx)
# Cut or pad the encoding to the right length
while len(sndx) > 5:
del sndx[-1]
while len(sndx) < 5:
sndx.append('0')
# Return formatted soundex
return ''.join(sndx)
if __name__ == '__main__':
print(soundex('Gutierrez'))
print(soundex('Pfister'))
print(soundex('Jackson'))
print(soundex('Tymczak'))
print(soundex('VanDeusen'))
print(soundex('Ashcraft'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment