Skip to content

Instantly share code, notes, and snippets.

@quux00
Created October 16, 2014 01:18
Show Gist options
  • Save quux00/cb010e2661086a3aa5c0 to your computer and use it in GitHub Desktop.
Save quux00/cb010e2661086a3aa5c0 to your computer and use it in GitHub Desktop.
A spelling checker / Levenshtein distance finder in Go
package main
import (
"bufio"
"fmt"
"log"
"os"
"strconv"
"strings"
"time"
)
func loadWordList() []string {
f, err := os.Open("assets_scrabble_words3.txt")
if err != nil {
log.Fatalf("WARN: %v\n", err)
}
defer f.Close()
wordlist := make([]string, 0, 40000)
scnr := bufio.NewScanner(f)
for scnr.Scan() {
ln := strings.TrimSpace(scnr.Text())
if len(ln) != 0 {
wordlist = append(wordlist, ln)
}
}
if err = scnr.Err(); err != nil {
log.Fatalf("Error reading in wordlist: %v\n", err)
}
return wordlist
}
// implementation of branchFreeMin
func min(x, y int) int {
xy := x - y
r := y + (xy & (xy >> 63))
return r
}
func abs(x int) int {
switch {
case x < 0:
return -x
case x == 0:
return 0 // return correctly abs(-0)
}
return x
}
func orderByLengthRunes(word1, word2 []rune) ([]rune, []rune) {
if len(word1) >= len(word2) {
return word1, word2
}
return word2, word1
}
// assumption: len(word1) >= len(word2)
func goToMismatch(word1, word2 []rune) ([]rune, []rune) {
for len(word2) > 0 {
if word1[0] != word2[0] {
return word1, word2
}
word1 = word1[1:]
word2 = word2[1:]
}
return word1, word2
}
//
// Returns position of first match of matchLetter in run within 0..forwardPositions
// or 0..len(word), whichever is smaller. Returns -1 if no match found within
// that range
//
func indexMatch(matchLetter rune, word []rune, forwardPositions int) int {
forwardPos := min(len(word), forwardPositions)
for i := 0; i < forwardPos; i++ {
if word[i] == matchLetter {
return i
}
}
return -1
}
func fuzzyMatch(word1, word2 []rune, remainingMismatches int) bool {
if abs(len(word1)-len(word2)) > remainingMismatches {
return false
}
var (
matchPos int
w1, w2 []rune // w1 is the longer of two rune slices
)
w1, w2 = orderByLengthRunes(word1, word2)
for len(w1) > 0 {
w1, w2 = goToMismatch(w1, w2)
if len(w2) == 0 {
remainingMismatches -= len(w1)
break
}
matchPos = indexMatch(w1[0], w2[1:], remainingMismatches)
if matchPos >= 0 {
if fuzzyMatch(w1, w2[1+matchPos:], remainingMismatches-(matchPos+1)) {
return true
}
}
matchPos = indexMatch(w2[0], w1[1:], remainingMismatches)
if matchPos >= 0 {
if fuzzyMatch(w2, w1[1+matchPos:], remainingMismatches-(matchPos+1)) {
return true
}
}
// if both directions fail, dec remainingMismatches and advance 1 in each rune slice
remainingMismatches -= 1
if remainingMismatches < 0 {
return false
}
w1 = w1[1:]
w2 = w2[1:]
}
return remainingMismatches >= 0
}
func MustAtoi(s string) int {
i, err := strconv.Atoi(s)
if err != nil {
panic(err)
}
return i
}
func main() {
word := []rune(os.Args[1])
allowedMismatches := MustAtoi(os.Args[2])
var wordlist, matchingWords []string
wordlist = loadWordList()
start := time.Now()
matchingWords = make([]string, 0, 10)
for _, candidate := range wordlist {
if fuzzyMatch(word, []rune(candidate), allowedMismatches) {
matchingWords = append(matchingWords, candidate)
}
}
end := time.Now()
fmt.Printf("Matches: %v\n", matchingWords)
fmt.Printf("Duration (excluding load dataset from file): %v\n", end.Sub(start))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment