Skip to content

Instantly share code, notes, and snippets.

@nfisher
Last active January 13, 2018 01:09
Show Gist options
  • Save nfisher/7125a2daeb51961cd34de4175fba8043 to your computer and use it in GitHub Desktop.
Save nfisher/7125a2daeb51961cd34de4175fba8043 to your computer and use it in GitHub Desktop.
Using James nlp package.
package search
import (
"github.com/james-bowman/nlp"
"github.com/james-bowman/nlp/measures/pairwise"
"gonum.org/v1/gonum/mat"
)
type Index struct {
vectoriser *nlp.CountVectoriser
transformer *nlp.TfidfTransformer
reducer *nlp.TruncatedSVD
pipeline *nlp.Pipeline
lsi mat.Matrix
}
func New(removeStopwords bool, k int) *Index {
vectoriser := nlp.NewCountVectoriser(removeStopwords)
transformer := nlp.NewTfidfTransformer()
reducer := nlp.NewTruncatedSVD(k)
return &Index{
vectoriser: vectoriser,
transformer: transformer,
reducer: reducer,
}
}
func (i *Index) Train(testCorpus []string) error {
lsi, err := i.pipeline.FitTransform(testCorpus...)
if err != nil {
return err
}
i.lsi = lsi
return nil
}
func (index *Index) Query(query string) (int, error) {
queryVector, err := index.pipeline.Transform(query)
if err != nil {
return -1, err
}
highestSimilarity := -1.0
var matched int
_, docs := index.lsi.Dims()
for i := 0; i < docs; i++ {
similarity := pairwise.CosineSimilarity(queryVector.(mat.ColViewer).ColView(0), index.lsi.(mat.ColViewer).ColView(i))
if similarity > highestSimilarity {
matched = i
highestSimilarity = similarity
}
}
return matched, nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment