Last active
November 8, 2015 04:04
-
-
Save juliahw/1fe9ba1981d58087cef8 to your computer and use it in GitHub Desktop.
Code for a Levenshtein trie, condensed into one file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class TrieNode { | |
// Constructs an empty trie node. | |
constructor() { | |
// The word completed by this node. | |
this.word = null; | |
// A child consists of a letter key and TrieNode value. | |
this.children = {}; | |
} | |
// Inserts a word into the trie, with this node as | |
// the root of the trie. | |
insert(word) { | |
let node = this; | |
for (let i = 0; i < word.length; i++) { | |
const letter = word[i]; | |
node.children[letter] = node.children[letter] || new TrieNode(); | |
node = node.children[letter]; | |
} | |
// Mark the end of the word. | |
node.word = word; | |
} | |
} | |
class LevenshteinTrie { | |
// Constructs a trie from the given dictionary. | |
constructor(dictionary, opts={}) { | |
this.root = new TrieNode(); | |
dictionary.forEach((word) => { | |
this.root.insert(word); | |
}); | |
this.opts = Object.assign(opts, LevenshteinTrie.defaultOptions); | |
} | |
// Returns an array of closest matches in the dictionary. | |
search(word) { | |
// Instantiate a 1 x n matrix. | |
let row = []; | |
for (let i = 0; i <= word.length; i++) { | |
row[i] = i; | |
} | |
let results = []; | |
for (const letter in this.root.children) { | |
this.searchRecursive(this.root.children[letter], letter, word, row, results); | |
} | |
if (this.opts.sort) { | |
results = results.sort((a, b) => a.distance - b.distance); | |
} | |
return { | |
words: results.map((a) => a.word), | |
min: minWithIndex(results, (a) => a.distance) | |
}; | |
} | |
// Traverses the trie recursively. | |
searchRecursive(node, letter, word, previousRow, results) { | |
if (!node) return; | |
// The first index in the row must always be one more than the | |
// row above it in the same column. This is because the first | |
// column represents comparison with the empty string. | |
let currentRow = [ previousRow[0] + 1 ]; | |
// Populate the rest of the row using the previously computed | |
// row. | |
for (let i = 1; i <= word.length; i++) { | |
if (letter === word[i - 1]) { | |
currentRow[i] = previousRow[i - 1]; | |
} else { | |
currentRow[i] = Math.min(previousRow[i - 1], previousRow[i], | |
currentRow[i - 1]) + 1; | |
} | |
} | |
// The last entry in the row is the edit distance. | |
const distance = currentRow[currentRow.length - 1]; | |
// If the node is a full word, we add the word to results... | |
if (node.word && distance < this.opts.maxDistance) { | |
// if the results array has room... | |
if (results.length < this.opts.maxResults) { | |
results.push({ word: node.word, distance: distance }); | |
} | |
// or if it is a closer match than the farthest result in the array. | |
else { | |
const max = maxWithIndex(results, a => a.distance); | |
if (distance < max.value) { | |
results[max.index] = { word: node.word, distance: distance }; | |
} | |
} | |
} | |
// If any entries in the row are less than maxDistance, | |
// we continue searching. | |
if (Math.min.apply(Math, currentRow) < this.opts.maxDistance) { | |
for (const letter in node.children) { | |
this.searchRecursive(node.children[letter], letter, word, currentRow, results); | |
} | |
} | |
} | |
} | |
LevenshteinTrie.defaultOptions = { | |
// A call to search() will return no more than maxResults results. | |
maxResults: 5, | |
// A call to search() will not return words farther than | |
// maxDistance apart. | |
maxDistance: Infinity, | |
// True if results should be returned sorted from closest | |
// to farthest match. | |
sortResults: false | |
} | |
// Returns the index and value of the minimum of arr. | |
function minWithIndex(arr, f) { | |
if (!arr.length) return null; | |
let min = f(arr[0]), minIndex = 0; | |
arr.forEach((item, index) => { | |
const value = f(item); | |
if (value < min) { | |
min = value; | |
minIndex = index; | |
} | |
}); | |
return { value: min, index: minIndex }; | |
} | |
// Returns the index and value of the maximum of arr. | |
function maxWithIndex(arr, f) { | |
if (!arr.length) return null; | |
let max = f(arr[0]), maxIndex = 0; | |
arr.forEach((item, index) => { | |
const value = f(item); | |
if (value > max) { | |
max = value; | |
maxIndex = index; | |
} | |
}); | |
return { value: max, index: maxIndex }; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment