Last active
July 3, 2018 02:49
-
-
Save DrustZ/dfd0b4a189913ac2c6464fead64af962 to your computer and use it in GitHub Desktop.
Wagner-Fischer Levenshtein distance, now with a means to generate all possible optimal alignments. (js version)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//A library adopted from Python version Wagner & Fischer algorithm: | |
//url: https://gist.github.com/kylebgorman/8034009 | |
//for more information, please go to the original library | |
function INSERTION(A, cost=1){ | |
return cost; | |
} | |
function DELETION(A, cost=1){ | |
return cost; | |
} | |
function SUBSTITUTION(A, B, cost=1) { | |
return cost; | |
} | |
function makeTrace(costv, opsv) { | |
return {cost: costv, | |
ops: opsv} | |
} | |
class WagnerFischer { | |
constructor(A, B, insertion=INSERTION, deletion=DELETION, substitution=SUBSTITUTION){ | |
// Stores cost functions in a dictionary for programmatic access. | |
this.costs = {"I": insertion, "D": deletion, "S": substitution} | |
this.asz = A.length | |
this.bsz = B.length | |
this.create2DArray() | |
// Fills in edges. | |
this.table[0][0] = makeTrace(0, ["O"]) // Start cell. | |
for (let i = 1; i < this.asz+1; ++i){ | |
this.table[i][0] = makeTrace(this.table[i - 1][0].cost + this.costs["D"](A[i - 1]), | |
["D"]) | |
} | |
for (let j = 1; j < this.bsz+1; ++j){ | |
this.table[0][j] = makeTrace(this.table[0][j - 1].cost + this.costs["I"](B[j - 1]), | |
["I"]) | |
} | |
// Fills in rest. | |
for (let i = 0; i < this.asz; ++i){ | |
for (let j = 0; j < this.bsz; ++j){ | |
// Cleans it up in case there are more than one check for match | |
// first, as it is always the cheapest option. | |
if (A[i] == B[j]){ | |
this.table[i + 1][j + 1] = makeTrace(this.table[i][j].cost, ["M"]) | |
} | |
// Checks for other types. | |
else{ | |
let costD = this.table[i][j + 1].cost + this.costs["D"](A[i]) | |
let costI = this.table[i + 1][j].cost + this.costs["I"](B[j]) | |
let costS = this.table[i][j].cost + this.costs["S"](A[i], B[j]) | |
let min_val = Math.min(costI, costD, costS) | |
let trace = makeTrace(min_val, []) | |
// Adds _all_ operations matching minimum value. | |
if (costD == min_val) | |
trace.ops.push("D") | |
if (costI == min_val) | |
trace.ops.push("I") | |
if (costS == min_val) | |
trace.ops.push("S") | |
this.table[i + 1][j + 1] = trace | |
} | |
} | |
} | |
// Stores optimum cost as a property. | |
this.cost = this.table[this.asz][this.bsz].cost | |
//cell operation iterator | |
this.stepback = function(i, j, trace, path_back){ | |
/* | |
Given a cell location (i, j) and a Trace object trace, generate | |
all traces they point back to in the table | |
*/ | |
let res = [] | |
for (let op of trace.ops){ | |
if (op === "M") | |
res.push([i - 1, j - 1, this.table[i - 1][j - 1], path_back.concat(["M"])]) | |
else if (op === "I") | |
res.push([i, j - 1, this.table[i][j - 1], path_back.concat(["I"])]) | |
else if (op === "D") | |
res.push([i - 1, j, this.table[i - 1][j], path_back.concat(["D"])]) | |
else if (op === "S") | |
res.push([i - 1, j - 1, this.table[i - 1][j - 1], path_back.concat(["S"])]) | |
else if (op === "O") // finished! | |
res.push([]) | |
} | |
return res | |
} | |
//alignment iterator | |
this.alignments = function*(){ | |
/* | |
Generate all alignments with optimal-cost via breadth-first | |
traversal of the graph of all optimal-cost (reverse) paths | |
implicit in the dynamic programming table | |
Each cell of the queue is a tuple of (i, j, trace, path_back) | |
where i, j is the current index, trace is the trace object at | |
this cell, and path_back is a reversed list of edit operations | |
which is initialized as an empty list. | |
*/ | |
let queue = this.stepback(this.asz, this.bsz, this.table[this.asz][this.bsz], []) | |
while (queue.length > 0){ | |
let step = queue.shift(); | |
if (step.length < 1) | |
continue | |
let i = step[0]; | |
let j = step[1]; | |
let trace = step[2]; | |
let path_back = step[3]; | |
if (trace.ops[0] === "O"){ | |
// We have reached the origin, the end of a reverse path, so | |
// yield the list of edit operations in reverse. | |
yield path_back.reverse() | |
continue | |
} | |
queue.push(...this.stepback(i, j, trace, path_back)) | |
} | |
} | |
} | |
IDS() { | |
/* | |
Estimates insertions, deletions, and substitution _count_ (not | |
costs). Non-integer values arise when there are multiple possible | |
alignments with the same cost. | |
*/ | |
let npaths = 0 | |
let opcounts = {M:0,I:0,D:0,S:0} | |
let alignments = this.alignments() | |
for (let alignment of alignments){ | |
// Counts edit types for this path, ignoring "M" (which is free). | |
opcounts.M += alignment.filter(function(val) { return val === "M";}).length; | |
opcounts.I += alignment.filter(function(val) { return val === "I";}).length; | |
opcounts.D += alignment.filter(function(val) { return val === "D";}).length; | |
opcounts.S += alignment.filter(function(val) { return val === "S";}).length; | |
npaths += 1 | |
} | |
for (let key in opcounts){ | |
opcounts[key] /= npaths | |
} | |
// Averages over all paths. | |
return opcounts; | |
} | |
create2DArray() { | |
this.table = [] | |
for (let i = 0; i < this.asz+1; ++i){ | |
this.table.push(new Array(this.bsz+1)) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment