Created
June 5, 2022 20:18
-
-
Save fschutte/cd30072bab84b7d6c66d8908d03024c5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Exact implementation in javascript of the Damerau-Levenshtein algorithm as | |
* described on https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance | |
* | |
* The only difference is that all arrays in here are 0-based indexed whereas | |
* on wikipedia d has indices starting at −1, while a, b and da are one-indexed. | |
* | |
* @customfunction | |
*/ | |
function distance(a, b) { | |
// da is a dictionary containing all letters as found in both strings | |
const da = {} | |
// d is a two dimensional array with dimensions length+2; the first two rows and cols are initialized and the rest is computed | |
const d = Array(a.length+2).fill().map(()=>Array(b.length+2)) | |
maxdist = a.length + b.length | |
d[0][0] = maxdist // Note that the pseudocode in wikipedia uses d[-1,-1] but I have 0-based indices | |
for (let i=1; i <= a.length+1; i++) { | |
d[i][0] = maxdist | |
d[i][1] = i-1 | |
da[a[i-1]] = 1 | |
} | |
for (let j=1; j <= b.length+1; j++) { | |
d[0][j] = maxdist | |
d[1][j] = j-1 | |
da[b[j-1]] = 1 | |
} | |
for (let i=2; i <= a.length+1; i++) { | |
let db = 1 | |
for (let j=2; j <= b.length+1; j++) { | |
let k = da[b[j-2]] | |
let l = db | |
let cost = 1 | |
if (a[i-2]===b[j-2]) { | |
cost = 0 | |
db = j | |
} | |
d[i][j] = Math.min( | |
d[i-1][j-1] + cost, // substitution | |
d[i][j-1] + 1, // insertion | |
d[i-1][j] + 1, // deletion | |
d[k-1][l-1] + (i-k-1) + 1 + (j-l-1) // transposition | |
) | |
} | |
da[a[i-2]] = i | |
} | |
return d[a.length+1][b.length+1] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment