Skip to content

Instantly share code, notes, and snippets.

@3d0c
Created July 11, 2015 18:06
Show Gist options
  • Save 3d0c/f428f8c713996ece74a8 to your computer and use it in GitHub Desktop.
Save 3d0c/f428f8c713996ece74a8 to your computer and use it in GitHub Desktop.
LCW Finder
// Longest compound word finder
//
// Usage: feed stid to the program
// E.g.:
// cat /tmp/123.txt | go run lcw.go
// curl http://norvig.com/ngrams/word.list | go run lcw.go
//
package main
import (
"bufio"
"container/list"
"fmt"
"os"
)
type Pair struct {
Full string
Part string
}
type Node struct {
children []*Node
letter rune
term bool
}
func (this *Node) findChildren(r rune) *Node {
for i, _ := range this.children {
if this.children[i].letter == r {
return this.children[i]
}
}
return nil
}
func (this *Node) setTerm() {
this.term = true
}
func (this *Node) hasTerm() bool {
return this.term
}
type Trie struct {
root *Node
}
func NewTrie() *Trie {
return &Trie{root: &Node{}}
}
func (this *Trie) AddWord(s string) {
var current *Node = this.root
if len(s) == 0 {
current.setTerm()
return
}
for idx, r := range s {
child := current.findChildren(r)
if child != nil {
current = child
} else {
child := &Node{letter: r}
current.children = append(current.children, child)
current = child
}
if idx == len(s)-1 {
current.setTerm()
}
}
}
type LCW struct {
stack list.List
pairs list.List
}
func (this *LCW) init(node *Node, s string) {
validWord := false
newWord := s + string(node.letter)
if node.hasTerm() {
validWord = true
if this.stack.Len() > 0 {
for e := this.stack.Front(); e != nil; e = e.Next() {
this.pairs.PushBack(Pair{newWord, e.Value.(string)})
}
}
this.stack.PushFront(newWord)
}
for i := 0; i < len(node.children); i++ {
tmp := node.children[i]
this.init(tmp, newWord)
}
if validWord {
this.stack.Remove(this.stack.Front())
}
}
func (this *LCW) checkPart(node *Node, pair Pair) {
f, p := pair.Full, pair.Part
rem := f[len(p):len(f)]
word := p
current := node
for _, r := range rem {
tmp := current.findChildren(r)
if tmp == nil {
return
}
word += string(r)
if tmp.hasTerm() {
this.pairs.PushBack(Pair{f, word})
}
current = tmp
}
}
func (this *LCW) Find(t *Trie) string {
result := ""
this.init(t.root, "")
for {
if this.pairs.Len() == 0 {
break
}
e := this.pairs.Front()
this.pairs.Remove(e)
if e.Value.(Pair).Part == e.Value.(Pair).Full {
if len(e.Value.(Pair).Full) > len(result) {
result = e.Value.(Pair).Full
}
} else {
this.checkPart(t.root, e.Value.(Pair))
}
}
return result
}
func main() {
t := NewTrie()
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
t.AddWord(scanner.Text())
}
lcw := &LCW{}
fmt.Println(lcw.Find(t))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment