Last active
September 27, 2024 03:56
-
-
Save trvswgnr/4046a72d31a611d270e92857770dfcf8 to your computer and use it in GitHub Desktop.
levenshtein distance ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export function levenshteinDistance(a: string, b: string): number { | |
if (typeof a !== "string" || typeof b !== "string") { | |
throw new TypeError("Arguments must be of type `string`"); | |
} | |
if (arguments.length > 2) { | |
throw new RangeError("Too many arguments"); | |
} | |
if (a === b) { | |
return 0; | |
} | |
// make sure `a` is the shorter string to optimize space | |
if (a.length > b.length) { | |
[a, b] = [b, a]; | |
} | |
let aLength = a.length; | |
let bLength = b.length; | |
// remove common suffix to reduce computation | |
while (aLength > 0 && a[aLength - 1] === b[bLength - 1]) { | |
aLength--; | |
bLength--; | |
} | |
// remove common prefix and adjust offsets | |
let commonPrefix = 0; | |
while (commonPrefix < aLength && a[commonPrefix] === b[commonPrefix]) { | |
commonPrefix++; | |
} | |
aLength -= commonPrefix; | |
bLength -= commonPrefix; | |
// if no characters are left after trimming, return remaining length | |
if (aLength === 0 || bLength < 3) { | |
return bLength; | |
} | |
// array of distances for dp | |
const distances: number[] = []; | |
// initialize the distance array with the cost of deletions and character codes of `a` | |
for (let i = 0; i < aLength; i++) { | |
distances.push(i + 1); // cost of deletions | |
distances.push(a.charCodeAt(commonPrefix + i)); // character code of `a` | |
} | |
const distancesLength = distances.length - 1; | |
let distance = 0; | |
let aIndex = 0; | |
let bIndex = 0; | |
// variables to hold distances for dynamic programming | |
let prevDistance: number; | |
let currentDistance0: number; | |
let currentDistance1: number; | |
let currentDistance2: number; | |
let currentDistance3: number; | |
// variables to hold character codes of `b` | |
let charCodeB0: number; | |
let charCodeB1: number; | |
let charCodeB2: number; | |
let charCodeB3: number; | |
// main loop: process four characters of `b` at a time | |
while (bIndex < bLength - 3) { | |
charCodeB0 = b.charCodeAt(commonPrefix + (currentDistance0 = bIndex)); | |
charCodeB1 = b.charCodeAt(commonPrefix + (currentDistance1 = bIndex + 1)); | |
charCodeB2 = b.charCodeAt(commonPrefix + (currentDistance2 = bIndex + 2)); | |
charCodeB3 = b.charCodeAt(commonPrefix + (currentDistance3 = bIndex + 3)); | |
distance = bIndex += 4; | |
let charCodeA: number; | |
for (aIndex = 0; aIndex < distancesLength; aIndex += 2) { | |
prevDistance = distances[aIndex]; // previous distance value | |
charCodeA = distances[aIndex + 1]; // character code of `a` | |
// compute distances using the custom `minDistance` function | |
currentDistance0 = minDistance( | |
prevDistance, | |
currentDistance0, | |
currentDistance1, | |
charCodeA, | |
charCodeB0, | |
); | |
currentDistance1 = minDistance( | |
currentDistance0, | |
currentDistance1, | |
currentDistance2, | |
charCodeA, | |
charCodeB1, | |
); | |
currentDistance2 = minDistance( | |
currentDistance1, | |
currentDistance2, | |
currentDistance3, | |
charCodeA, | |
charCodeB2, | |
); | |
distance = minDistance( | |
currentDistance2, | |
currentDistance3, | |
distance, | |
charCodeA, | |
charCodeB3, | |
); | |
distances[aIndex] = distance; // update the distance array | |
// prepare distances for the next iteration | |
currentDistance3 = currentDistance2; | |
currentDistance2 = currentDistance1; | |
currentDistance1 = currentDistance0; | |
currentDistance0 = prevDistance; | |
} | |
} | |
// handle remaining characters in `b` | |
while (bIndex < bLength) { | |
currentDistance0 = bIndex++; | |
charCodeB0 = b.charCodeAt(commonPrefix + currentDistance0); | |
distance = bIndex; | |
for (aIndex = 0; aIndex < distancesLength; aIndex += 2) { | |
prevDistance = distances[aIndex]; | |
distances[aIndex] = distance = minDistance( | |
prevDistance, | |
currentDistance0, | |
distance, | |
charCodeB0, | |
distances[aIndex + 1], | |
); | |
currentDistance0 = prevDistance; | |
} | |
} | |
return distance; | |
} | |
function minDistance( | |
diag: number, | |
top: number, | |
left: number, | |
charCodeA: number, | |
charCodeB: number, | |
): number { | |
if (diag < top || left < top) { | |
// insertion or deletion | |
return diag > left ? left + 1 : diag + 1; | |
} else { | |
// substitution or match | |
return charCodeA === charCodeB ? top : top + 1; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment