Skip to content

Instantly share code, notes, and snippets.

@miku
Last active February 13, 2021 23:23
Show Gist options
  • Save miku/22a6a84a58db012817ac to your computer and use it in GitHub Desktop.
Save miku/22a6a84a58db012817ac to your computer and use it in GitHub Desktop.
Handbaked N-Gram string similarity in Golang.
package main
import "fmt"
import "github.com/juju/utils/set"
func jaccard(a, b set.Strings) float64 {
return float64(a.Intersection(b).Size()) / float64(a.Union(b).Size())
}
func ngrams(s string, n int) set.Strings {
var result set.Strings
for i := 0; i < len(s)-n+1; i++ {
result.Add(s[i : i+n])
}
return result
}
func main() {
a := "Flughafen Leipzig"
b := "Flughafen zig"
fmt.Println(ngrams(a, 3))
fmt.Println(ngrams(a, 3))
fmt.Println(jaccard(ngrams(a, 3), ngrams(b, 3)))
}
@jftuga
Copy link

jftuga commented Feb 13, 2021

// working example as of 2021-02-13

package main

import (
	"fmt"
	"github.com/juju/collections/set"
)

func jaccard(a, b set.Strings) float64 {
	return float64(a.Intersection(b).Size()) / float64(a.Union(b).Size())
}

func ngrams(s string, n int) set.Strings {
	result := set.NewStrings()
	for i := 0; i < len(s)-n+1; i++ {
		result.Add(s[i : i+n])
	}
	return result
}

func main() {

	a := "Flughafen Leipzig"
	b := "Flughafen zig"

	fmt.Println(ngrams(a, 3))
	fmt.Println(ngrams(a, 3))

	fmt.Println(jaccard(ngrams(a, 3), ngrams(b, 3)))
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment