Last active
December 17, 2015 02:29
-
-
Save whyrusleeping/5536454 to your computer and use it in GitHub Desktop.
Quick fuzzy search that could easily be improved
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"os" | |
) | |
type FuzzMatch struct { | |
str string | |
val int | |
} | |
func FuzzySearch(pattern string, source []string) []FuzzMatch { | |
ret := make([]FuzzMatch, len(source)) | |
i := 0 | |
for _,s := range source { | |
val := fuzzyMatch(pattern, s) | |
if val > 0 { | |
ret[i] = FuzzMatch{s, val} | |
i++ | |
} | |
} | |
mergeSortResults(ret[:i]) | |
return ret[:i] | |
} | |
func fuzzyMatch(pattern, match string) int { | |
//If the pattern is longer than the word, its obviously not a match | |
if len(pattern) > len(match) { | |
return 0 | |
} | |
j := 0 | |
//Check if the letters in pattern occur in the same order in match | |
retVal := 0 | |
var lastMatch bool | |
for i := 0; i < len(match); i++ { | |
if j >= len(pattern) { | |
return retVal | |
} | |
if pattern[j] == match[i] { | |
if i == 0 { | |
retVal += 3 | |
} | |
j++ | |
if lastMatch { | |
retVal += 2 | |
} | |
lastMatch = true | |
retVal++ | |
} else { | |
lastMatch = false | |
} | |
} | |
return retVal | |
} | |
func mergeSortResults(part []FuzzMatch) { | |
if len(part) < 2 { | |
return | |
} | |
mergeSortResults(part[:len(part) / 2]) | |
mergeSortResults(part[len(part) / 2:]) | |
mergeResults(part[:len(part) / 2], part[len(part) / 2:]) | |
} | |
func mergeResults(left, right []FuzzMatch) { | |
temp := make([]FuzzMatch, len(left) + len(right)) | |
li, ri := 0, 0 | |
for i, _ := range temp { | |
if li < len(left) && ri < len(right) { | |
if left[li].val > right[ri].val { | |
temp[i] = left[li] | |
li++ | |
} else { | |
temp[i] = right[ri] | |
ri++ | |
} | |
} else if li < len(left) { | |
temp[i] = left[li] | |
li++ | |
} else { | |
temp[i] = right[ri] | |
ri++ | |
} | |
} | |
left = left[:len(left) + len(right)] | |
for i, m := range temp { | |
left[i] = m | |
} | |
} | |
func LoadWordList(filename string) []string { | |
fi, err := os.Open(filename) | |
if err != nil { | |
panic(err) | |
} | |
retList := make([]string, 128) | |
i := 0 | |
for { | |
//Expand array if need be | |
if i == len(retList) { | |
retList = append(retList, make([]string, len(retList))...) | |
} | |
var s string | |
_, err = fmt.Fscanln(fi, &s) | |
if err != nil { | |
break | |
} | |
retList[i] = s | |
i++ | |
} | |
return retList[:i] | |
} | |
func main() { | |
wl := LoadWordList("brit-a-z.txt") | |
//wl := []string{"hello","fish","frippery","cats"} | |
var patt string | |
fmt.Println("Enter search query:") | |
fmt.Scanf("%s", &patt) | |
finds := FuzzySearch(patt, wl) | |
for _,w := range finds[:20] { | |
fmt.Println(w) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment