Created
May 8, 2012 07:30
-
-
Save acreeger/2633296 to your computer and use it in GitHub Desktop.
Simple TrieNode implementation in groovy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class TrieNode { | |
boolean value = false | |
def children = [:] | |
public void build(words) { | |
if (words in String) words = [words] | |
def startTime = System.currentTimeMillis(); | |
for (String word : words) { | |
word = word.toLowerCase() | |
def node = this | |
for (int j = 0; j < word.size();j++) { | |
def c = word.charAt(j) | |
def newNode = node.children[c] | |
if (!newNode) { | |
newNode = new TrieNode() | |
node.children[c] = newNode | |
} | |
node = newNode | |
} | |
node.value = true | |
} | |
println "Added ${words.size()} word(s) in ${System.currentTimeMillis() - startTime}ms." | |
} | |
public boolean find(String word) { | |
def node = this | |
word = word.toLowerCase() | |
for (char c : word.toCharArray()) { | |
if (node.children[c]) { | |
node = node.children[c] | |
} else { | |
return false | |
} | |
} | |
return node.value | |
} | |
public Collection getAll(Stack currentChars = new Stack()) { | |
def results = [] as Set | |
//currentChars.size().times {print '\t'}; println "currentChars: ${currentChars.join("")}" | |
if (value) results << currentChars.join("") | |
children.each { k,v -> | |
currentChars.push(k) | |
results += v.getAll(currentChars) | |
currentChars.pop() | |
} | |
return results | |
} | |
public Collection prefixedWith(String prefix) { | |
if (prefix.size() < 3) { | |
throw new Exception("prefix must be longer than 2 letters") | |
} | |
prefix = prefix.toLowerCase() | |
def node = this | |
for(int i = 0; i < prefix.size() && node != null; i++) { | |
def ch = prefix.charAt(i) | |
node = node.children[ch] | |
} | |
if (!node) { | |
return [] | |
} else { | |
def stack = new Stack() | |
for (def c:prefix.toCharArray()) {stack.push(c)} | |
return node.getAll(stack) | |
} | |
} | |
public Collection findSimilar(charArray, int allowedEdits) { | |
def startTime = System.currentTimeMillis(); | |
def results = _findSimilar(charArray, allowedEdits) | |
println "Found ${results.size()} word(s) in ${System.currentTimeMillis() - startTime}ms." | |
return results | |
} | |
private Set _findSimilar(charArray, int allowedEdits, currentChars = new Stack()) { | |
def results = [] as Set | |
if (charArray in String) charArray = charArray.toLowerCase().toCharArray() as List | |
indentNTimes(currentChars.size(), "In findSimilar with charArray: $charArray and $allowedEdits allowedEdits. currentChars: ${currentChars.join('')}") | |
if (!charArray) { | |
if (this.value) { | |
def result = currentChars.join("") | |
indentNTimes(currentChars.size(), "Adding value to results: ${result}") | |
results << result | |
} | |
} | |
if (allowedEdits > 0){ | |
//DELETIONS: Remove letter, try it on the current branch | |
def remainder = charArray ? charArray.tail() : [] | |
def head = charArray ? charArray.head() : null | |
indentNTimes(currentChars.size(), "Looking for deletions, calling findSimilar on this instance with charArray: ${remainder}") | |
results += _findSimilar(remainder, allowedEdits - 1, currentChars) | |
children.each { k,v -> | |
//INSERTIONS: Pass word, with no changes to each of the children | |
currentChars.push(k) | |
indentNTimes(currentChars.size() - 1, "Looking for insertions, calling findSimilar on TrieNode $k with charArray: ${charArray}") | |
results += v._findSimilar(charArray, allowedEdits - 1, currentChars) | |
//SUBSITUTIONS: Pass word without first letter to each of the children | |
if (k != head) { | |
indentNTimes(currentChars.size() - 1, "Looking for substitutions, calling findSimilar on TrieNode $k with charArray: ${remainder}") | |
results += v._findSimilar(remainder, allowedEdits - 1, currentChars) | |
} | |
currentChars.pop() | |
} | |
//TRANSPOSITIONs: Pass the word with first two swapped down the current path. | |
if (charArray.size() > 2) { | |
List transposedCharArray = [charArray[1], charArray[0]] | |
if (charArray.size() > 2) transposedCharArray += charArray[2..-1] | |
indentNTimes(currentChars.size(), "Looking for transpositions, calling findSimilar on this instance with charArray: ${transposedCharArray}") | |
results += _findSimilar(transposedCharArray, allowedEdits - 1, currentChars) | |
} | |
} | |
if (charArray) { | |
def ch = charArray.head() | |
def matchingChild = children[ch] | |
indentNTimes(currentChars.size(), "Now looking for paths with no edits") | |
if (matchingChild) { | |
indentNTimes(currentChars.size(), "Found child $ch, heading down the tree") | |
currentChars.push(ch) | |
results += matchingChild._findSimilar(charArray.tail(), allowedEdits, currentChars) | |
currentChars.pop() | |
} | |
} | |
return results | |
} | |
private indentNTimes(i, str) { | |
// i.times {print "\t"} | |
// println str | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment