Skip to content

Instantly share code, notes, and snippets.

@ryszard
Created April 1, 2012 18:37
Show Gist options
  • Save ryszard/2277595 to your computer and use it in GitHub Desktop.
Save ryszard/2277595 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"math"
"flag"
"strings"
)
type Ngrams struct {
grams map[string]float64
n int
}
func NewNgrams(s string, n int) *Ngrams {
ngrams := Ngrams{make(map[string]float64), n}
padded := fmt.Sprintf("%s%s", strings.Repeat("$", n), s)
for i := 0; i <= len(padded)-n; i++ {
ngrams.grams[padded[i:i+n]] += 1.0
}
ngrams.normalize()
return &ngrams
}
func compare(left, right *Ngrams) (result float64) {
for k, v := range left.grams {
result += right.grams[k] * v
}
return
}
func Compare(left, right string, n int) float64 {
if left == right {
return 1.0
}
return compare(NewNgrams(left, n), NewNgrams(right, n))
}
func (n *Ngrams) normalize() {
var sum float64
for _, v := range n.grams {
sum += math.Pow(v, 2)
}
norm := math.Sqrt(sum)
for k := range n.grams {
n.grams[k] /= norm
}
}
func main() {
n := flag.Int("n", 3, "length of ngram to be used.")
flag.Parse()
if l := len(flag.Args()); l != 2 {
panic("You need to provide exactly two strings to compare")
}
for i := 0; i < 100000; i++ {
Compare(flag.Arg(0), flag.Arg(1), *n)
}
fmt.Println(Compare(flag.Arg(0), flag.Arg(1), *n))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment