Skip to content

Instantly share code, notes, and snippets.

@fitzgen
Created February 18, 2011 01:25
Show Gist options
  • Save fitzgen/833090 to your computer and use it in GitHub Desktop.
Save fitzgen/833090 to your computer and use it in GitHub Desktop.
Uses a slightly modified version of the Levenshtien Distance algorithm (also known as "edit distance") to calculate the set of the fewest operations which will change string s in to string t.
# Output of the example which is console.logged at the bottom of operations.js:
$ node operations.js
To change 'kitten' in to 'sitting':
[ [ 'delete', 'k' ]
, [ 'insert', 's' ]
, [ 'retain', 1 ]
, [ 'retain', 1 ]
, [ 'retain', 1 ]
, [ 'delete', 'e' ]
, [ 'insert', 'i' ]
, [ 'retain', 1 ]
, [ 'insert', 'g' ]
]
(function () {
// Simple operation constructors.
function insert (chars) {
return ["insert", chars];
}
function del (chars) {
return ["delete", chars];
}
function retain (n) {
return ["retain", n];
}
// We don't want to copy arrays all the time, aren't mutating lists, and
// only need O(1) prepend and length, we can get away with a custom singly
// linked list implementation.
var theEmptyList = {
length: 0,
toArray: function () {
return [];
}
};
function toArray () {
return [this.car].concat(this.cdr.toArray());
}
function cons (car, cdr) {
return {
car: car,
cdr: cdr,
length: 1 + cdr.length,
toArray: toArray
};
}
// Abstract out the table in case I want to change the implementation to
// arrays of arrays or something.
function put (table, x, y, ops) {
return (table[String(x) + "," + String(y)] = ops);
}
function get (table, x, y) {
var ops = table[String(x) + "," + String(y)];
if ( ops ) {
return ops;
} else {
throw new TypeError("No operations at " + String(x) + "," + String(y));
}
}
function makeOperationsTable (s, t) {
var table = {},
n = s.length,
m = t.length,
i,
j;
put(table, 0, 0, theEmptyList);
for ( i = 1; i <= m; i += 1 ) {
put(table, i, 0, cons(insert(t.charAt(i-1)),
get(table, i-1, 0)));
}
for ( j = 1; j <= n; j += 1 ) {
put(table, 0, j, cons(del(s.charAt(j-1)),
get(table, 0, j-1)));
}
return table;
}
function chooseCell (table, x, y, k) {
var prevOps = get(table, x, y-1),
min = prevOps.length,
direction = "up";
if ( get(table, x-1, y).length < min ) {
prevOps = get(table, x-1, y);
min = prevOps.length;
direction = "left";
}
if ( get(table, x-1, y-1).length < min ) {
prevOps = get(table, x-1, y-1);
min = prevOps.length;
direction = "diagonal";
}
return k(direction, prevOps);
}
function operations (s, t) {
var n = s.length,
m = t.length,
i,
j,
ops = makeOperationsTable(s, t);
for ( i = 1; i <= m; i += 1 ) {
for ( j = 1; j <= n; j += 1 ) {
chooseCell(ops, i, j, function (direction, prevOps) {
switch ( direction ) {
case "left":
put(ops, i, j, cons(insert(t.charAt(i-1)), prevOps));
break;
case "up":
put(ops, i, j, cons(del(s.charAt(j-1)), prevOps));
break;
case "diagonal":
if ( s.charAt(j-1) === t.charAt(i-1) ) {
put(ops, i, j, cons(retain(1), prevOps));
} else {
put(ops, i, j, cons(insert(t.charAt(i-1)),
cons(del(s.charAt(j-1)),
prevOps)));
}
break;
default:
throw new TypeError("Unknown direction.");
}
});
}
}
return get(ops, i-1, j-1).toArray().reverse();
}
exports = typeof exports !== "undefined"
? exports
: this;
exports.operations = operations;
console.log("To change 'kitten' in to 'sitting':");
console.log(operations("kitten", "sitting"));
}());
@leeoniya
Copy link

check this one out:
https://gist.github.com/527658

also:
http://snippets.dzone.com/posts/show/6942

would be pretty cool to add a Damerau–Levenshtein distance variation where swapping characters is also considered a single-cost operation. it takes bit more mem for the optmizied version since you need to store a history of 2 back. i think both are more efficient than this one since it looks like you pre-build the entire table.

some speed optimization for just getting distance is a quick check against source and target string length and returning the full length of the other when either is an empty string.

    var l1 = str1.length, l2 = str2.length;
    if (Math.min(l1, l2) === 0) {
        return Math.max(l1, l2);
    }

http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#JavaScript

@fitzgen
Copy link
Author

fitzgen commented Feb 18, 2011

Thanks for those links!

I'm aware that you only need 2 columns of the matrix at any given time, I just wasn't sure it was worth the hassle. In my defense, I did only just throw this together :)

I'm actually much more interested in what the operations are, rather than what the edit distance is. I think this could be a promising technique for generating the operations for Operational Transformation (which is also why I split substitution in to insert and delete, and have retain all over the place).

http://en.wikipedia.org/wiki/Operational_transformation

@leeoniya
Copy link

no prob :)

this is also a cool demo w/cost adjustments but all done server-side:
http://odur.let.rug.nl/~kleiweg/lev/

@fitzgen
Copy link
Author

fitzgen commented Feb 18, 2011

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment