Skip to content

Instantly share code, notes, and snippets.

@whacked
Last active June 20, 2018 08:42
Show Gist options
  • Save whacked/18f450fb879f38eecd964deaa2b749f5 to your computer and use it in GitHub Desktop.
Save whacked/18f450fb879f38eecd964deaa2b749f5 to your computer and use it in GitHub Desktop.
quick and dirty word2vec server/client using flask, jsonrpc, gensim

RUNNING THE SERVER

python
python w2v.py server
go
go run w2v.go

QUERYING THE SERVER:

  • python client:
    python server
    python w2v.py client
    go server
    python w2v.py client 5002
  • restclient.el: run this file

NOTES:

python server starts in ~18.7s, queries in ~0.10s.

go server starts in 1.8s, queries in ~1.8s. go query is likely slow due to go-w2v’s looping.

restclient

#

# POST http://localhost:5001/rpc Content-Type: application/json Content-Length: 145

{ “jsonrpc”: “2.0”, “method”: “App.most_similar”, “params”: { “positive”: [“queen”, “man”], “negative”: [“king”] }, “id”: “1” }

#

# POST http://localhost:5002/rpc Content-Type: application/json Content-Length: 145

{ “jsonrpc”: “2.0”, “method”: “App.most_similar”, “params”: { “positive”: [“queen”, “man”], “negative”: [“king”] }, “id”: “1” }

package main
import (
"context"
"fmt"
"log"
"net/http"
"os"
"path"
"time"
"github.com/intel-go/fastjson"
"github.com/mattn/go-w2v"
"github.com/osamingo/jsonrpc"
)
type Word2VecSimilarityResult struct {
Score float64 `json:"score"`
Token string `json:"token"`
}
type (
Word2VecQueryHandler struct{}
Word2VecQueryParams struct {
Positive []string `json:"positive"`
Negative []string `json:"negative"`
}
Word2VecQueryResult struct {
Status string `json:"status"`
Result []Word2VecSimilarityResult `json:"result"`
}
)
func (h Word2VecQueryHandler) ServeJSONRPC(c context.Context, params *fastjson.RawMessage) (interface{}, *jsonrpc.Error) {
var p Word2VecQueryParams
if err := jsonrpc.Unmarshal(params, &p); err != nil {
return nil, err
}
result := GetWord2VecSimilarityResult(MainModel, p.Positive, p.Negative, 10)
return Word2VecQueryResult{
Status: "ok",
Result: result,
}, nil
}
func Now() float64 {
return float64(time.Now().UnixNano()) / 1e9
}
func GetWord2VecSimilarityResult(model *w2v.Model, positiveTokenColl []string, negativeTokenColl []string, limit int) []Word2VecSimilarityResult {
var queryVec *w2v.Vector
for _, posToken := range positiveTokenColl {
vec := model.Find(posToken)
if queryVec == nil {
queryVec = vec
} else {
queryVec = queryVec.Add(vec)
}
}
for _, negToken := range negativeTokenColl {
vec := model.Find(negToken)
if queryVec == nil {
queryVec = vec
} else {
queryVec = queryVec.Sub(vec)
}
}
t0 := Now()
similarEntryColl := model.CosineSimilars(queryVec)
tN := Now()
fmt.Printf("OK: (elapsed %fs) search complete\n", tN-t0)
nItem := len(similarEntryColl)
var nTake int
if nItem < limit {
nTake = nItem
} else {
nTake = limit
}
out := make([]Word2VecSimilarityResult, 0)
for _, entry := range similarEntryColl[:nTake] {
out = append(out, Word2VecSimilarityResult{
Score: entry.Value,
Token: entry.Vector.Word(),
})
}
return out
}
var MainModel *w2v.Model
func main() {
modelPath := string(path.Join("word2vec.6B.50d.txt"))
fmt.Printf("loading: %v\n", modelPath)
modelFile, err := os.Open(modelPath)
if err != nil {
log.Fatal("Failed to open file")
}
var t0, tN float64
t0 = Now()
model, err := w2v.LoadText(modelFile)
if err != nil {
log.Fatal("Failed to load model")
}
MainModel = &model
tN = Now()
fmt.Printf("OK: (elapsed %fs) loaded %v\n", tN-t0, modelPath)
positives := []string{"queen", "man"}
negatives := []string{"king"}
fmt.Printf("positives: %v\n", positives)
fmt.Printf("negatives: %v\n", negatives)
myTest := GetWord2VecSimilarityResult(MainModel, positives, negatives, 5)
for i, res := range myTest {
fmt.Printf("sim %v: %v -- %v\n", i, res.Token, res.Score)
}
mr := jsonrpc.NewMethodRepository()
if err := mr.RegisterMethod(
"App.most_similar",
Word2VecQueryHandler{},
Word2VecQueryParams{},
Word2VecQueryResult{}); err != nil {
log.Fatalln(err)
}
http.Handle("/rpc", mr)
http.HandleFunc("/rpc/debug", mr.ServeDebug)
if err := http.ListenAndServe(":5002", http.DefaultServeMux); err != nil {
log.Fatalln(err)
}
}
import sys
import os
import os.path as _p
import time
import flask
from flask import Flask, request
from flask_jsonrpc import JSONRPC
from flask_cors import CORS, cross_origin
from jsonrpc_requests import Server as JSONRPCServer
import gensim
from gensim.models import KeyedVectors
from gensim.test.utils import datapath
SERVER_PORT = 5001
SERVER_HOST = 'localhost'
app = Flask(__name__)
CORS(app)
jsonrpc = JSONRPC(app, '/rpc')
MODEL_FILEPATH = _p.join(os.getcwd(), 'word2vec.6B.50d.txt')
MODEL = None
@jsonrpc.method('App.most_similar(positive=list, negative=list)')
def App_most_similar(positive, negative):
if MODEL:
out = [
dict(token = tk, score = scr)
for (tk, scr) in
MODEL.most_similar(positive=positive, negative=negative)
]
else:
out = None
return {
'status': 'ok',
'result': out,
}
def split(s):
return filter(lambda x: x,
[token.strip() for token in s.split(',')])
@app.route('/')
def index():
positive = split(request.values.get('positive', ''))
negative = split(request.values.get('negative', ''))
result = []
if positive:
result = App_most_similar(positive, negative)['result']
return '''\
<h3>hello</h3>
<form>
<input name="positive" type="text" placeholder="positive" autocomplete="off" />
<input name="negative" type="text" placeholder="negative" autocomplete="off" />
<input type="submit" />
</form>
<ul>
<li>positives: %(positive)s</li>
<li>negatives: %(negative)s</li>
</ul>
<ol>
%(rowcoll)s
</ol>
''' % dict(
positive = positive,
negative = negative,
rowcoll = '\n'.join([
'<li>{}</li>'.format(
sim
) for sim in result
])
)
if __name__ == '__main__':
if 'server' in sys.argv:
t0 = time.time()
MODEL = KeyedVectors.load_word2vec_format(datapath(MODEL_FILEPATH))
tN = time.time()
print('loaded vectors in %.6f s' % (tN - t0))
app.run(SERVER_HOST, port=SERVER_PORT)
elif 'client' in sys.argv:
if sys.argv[-1].isdigit():
use_port = int(sys.argv[-1])
else:
use_port = SERVER_PORT
jsonrpc_server = JSONRPCServer('http://%s:%s/rpc' % (SERVER_HOST, use_port))
# non-named argument will fail to work with the go server.
# Python server is more lenient, this will work:
# print jsonrpc_server.App.most_similar(['queen', 'man'], ['king'])
print jsonrpc_server.App.most_similar(positive=['python', 'language'], negative=[])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment