Created
February 9, 2020 20:35
-
-
Save alessiosavi/42a454f1fbecc341949d97a281f56ef0 to your computer and use it in GitHub Desktop.
A simple PoC for fuzzy search in golang
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"errors" | |
"fmt" | |
"log" | |
"math" | |
"strings" | |
) | |
// Letters that we want to fuzzy search against | |
var data string = `THE RED COW JUMPED OVER THE GREEN CHICKEN-THE RED COW JUMPED OVER THE RED COW-THE RED FOX JUMPED OVER THE BROWN COW` | |
var target string = `THE BROWN FOX JUMPED OVER THE RED COW"` | |
const minDistance float64 = 2 | |
const difference float64 = 1 | |
type word struct { | |
data string | |
letters map[rune]int | |
} | |
type words struct { | |
words []word | |
} | |
// Print prettify the data present in word | |
func (w word) Print() { | |
var ( | |
lenght int | |
c int | |
i int | |
key rune | |
) | |
fmt.Printf("Data: %s\n", w.data) | |
lenght = len(w.letters) - 1 | |
c = 0 | |
for key, i = range w.letters { | |
fmt.Printf("%s:%d", string(key), i) | |
if c != lenght { | |
fmt.Printf(" | ") | |
} | |
c++ | |
} | |
fmt.Printf("\n") | |
} | |
func (ws words) fuzzySearch(data string) ([]word, error) { | |
var ( | |
w word | |
err error | |
founds []word | |
) | |
w, err = initWord(data) | |
if err != nil { | |
log.Printf("Errors: %s\n", err.Error()) | |
return nil, err | |
} | |
// Iterating all the words | |
for i := range ws.words { | |
letters := ws.words[i].letters | |
// | |
var similar float64 = 0 | |
// Iterating the letters of the input data | |
for key := range w.letters { | |
if val, ok := letters[key]; ok { | |
if math.Abs(float64(val-w.letters[key])) <= minDistance { | |
similar += float64(val) | |
} | |
} | |
} | |
lenSimilarity := math.Abs(similar - float64(len(data)-strings.Count(data, " "))) | |
log.Printf("Comparing %s with %s i've found %f similar letter, with weight %f", data, ws.words[i].data, similar, lenSimilarity) | |
if lenSimilarity <= difference { | |
founds = append(founds, ws.words[i]) | |
} | |
} | |
if len(founds) == 0 { | |
return nil, errors.New("no similar found for data: " + data) | |
} | |
return founds, nil | |
} | |
func initWords(data []string) []word { | |
var ( | |
err error | |
words []word | |
word word | |
) | |
for i := range data { | |
word, err = initWord(data[i]) | |
if err != nil { | |
log.Printf("Error in index [%d] for data: %s", i, data[i]) | |
} else { | |
words = append(words, word) | |
} | |
} | |
return words | |
} | |
func initWord(data string) (word, error) { | |
var word word | |
word.data = data | |
word.letters = make(map[rune]int) | |
for _, r := range data { | |
if r != 32 { // avoid to save the whitespace | |
word.letters[r]++ | |
} | |
} | |
return word, nil | |
} | |
func main() { | |
var ws words | |
words := initWords(strings.Split(data, "-")) | |
for i := range words { | |
words[i].Print() | |
} | |
ws.words = words | |
solution, _ := ws.fuzzySearch(target) | |
fmt.Println("Possible solutions: ", solution) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment