Created
July 19, 2022 14:56
-
-
Save dmeehan1968/85dd600b1b88a8c189346f690c53dc55 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* American Soundex algorithm (see https://en.wikipedia.org/wiki/Soundex) | |
* | |
* @example | |
* americanSoundex('Robert') === 'R163' | |
* americanSoundex('Rupert') === 'R163' | |
* americanSoundex('Rubin') === 'R150' | |
* americanSoundex('Ashcraft') === 'A261' | |
* americanSoundex('Ashcroft') === 'A261' | |
* americanSoundex('Tymczak') === 'T522' | |
* americanSoundex('Pfister') === 'P236' | |
* americanSoundex('Presley') === 'P624' | |
* americanSoundex('Presleye') === 'P624' | |
* americanSoundex('the') === 'T000' | |
* americanSoundex('thee') === 'T000' | |
* americanSoundex('Honeyman') === 'H555' | |
*/ | |
function americanSoundex(name: string): string { | |
const letters = name.toUpperCase().split('').filter(letter => letter.match(/[A-Z]/)); | |
const codeAsDigits = letters.map(letter => { | |
const letterToSoundexCodeMap: Record<string, string> = { | |
AEIOU: '0', | |
BFPV: '1', | |
CGJKQSXZ: '2', | |
DT: '3', | |
L: '4', | |
MN: '5', | |
R: '6', | |
HWY: '.', | |
} | |
const key = Object.keys(letterToSoundexCodeMap).find(key => key.includes(letter)) ?? '' | |
return letterToSoundexCodeMap[key] ?? letter; | |
}) | |
const removingPlaceholdersExceptFirst = (digit: string, index: number) => index === 0 || digit !== '.'; | |
const removingAdjacentDuplicates = (digit: string, index: number, array: string[]) => index === 0 || digit !== array[index - 1]; | |
const removingZerosExceptFirst = (digit: string, index: number) => index === 0 || digit !== '0'; | |
return letters[0] + codeAsDigits | |
.filter(removingPlaceholdersExceptFirst) | |
.filter(removingAdjacentDuplicates) | |
.filter(removingZerosExceptFirst) | |
.join('') | |
.slice(1, 4) | |
.padEnd(3, '0') | |
} | |
[ | |
{ name: 'Robert', code: 'R163' }, | |
{ name: 'Rupert', code: 'R163' }, | |
{ name: 'Rubin', code: 'R150' }, | |
{ name: 'Ashcraft', code: 'A261' }, | |
{ name: 'Ashcroft', code: 'A261' }, | |
{ name: 'Tymczak', code: 'T522' }, | |
{ name: 'Pfister', code: 'P236' }, | |
{ name: 'Presley', code: 'P624' }, | |
{ name: 'Presleye', code: 'P624' }, | |
{ name: 'the', code: 'T000' }, | |
{ name: 'thee', code: 'T000' }, | |
{ name: 'Honeyman', code: 'H555' }, | |
].forEach(({ name, code }) => { | |
const actual = americanSoundex(name); | |
console.log(`${name} => ${actual} ${actual === code ? '✅' : '❌'} ${actual === code ? '' : `Expected: ${code}`}`); | |
}) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment