Venkatstatistics · August 10, 2019 16:33
diff --git a/spacy word similarity b/spacy word similarity
 def process():
    import en_vectors_web_lg
    nlp = en_vectors_web_lg.load()

    topicdf = pd.read_csv("small_Topics.csv", encoding='Latin-1')
    topics = topicdf.Topic.tolist()

    while True:
        big_keyword = r.lpop('big_keywords').decode('utf-8')
        if not big_keyword:
            break
        key = 'keyword_score###{}'.format(big_keyword)
        for topic in topics:
            score = nlp(str(big_keyword)).similarity(nlp(str(topic)))
            elem = "{}###{}###{}".format(big_keyword, topic, score)
            r.zadd(key, {elem: score})

        n_result = r.zrevrangebyscore(key, "+inf", "-inf", start=0, num=SIMILAR_COUNT)
        result = [big_keyword] + [r.decode('utf-8').split('###')[1] for r in n_result]

        r.lpush('results', ','.join(result))
        r.delete(key)
        print(','.join(result))
	def process():
	import en_vectors_web_lg
	nlp = en_vectors_web_lg.load()

	topicdf = pd.read_csv("small_Topics.csv", encoding='Latin-1')
	topics = topicdf.Topic.tolist()

	while True:
	big_keyword = r.lpop('big_keywords').decode('utf-8')
	if not big_keyword:
	break
	key = 'keyword_score###{}'.format(big_keyword)
	for topic in topics:
	score = nlp(str(big_keyword)).similarity(nlp(str(topic)))
	elem = "{}###{}###{}".format(big_keyword, topic, score)
	r.zadd(key, {elem: score})

	n_result = r.zrevrangebyscore(key, "+inf", "-inf", start=0, num=SIMILAR_COUNT)
	result = [big_keyword] + [r.decode('utf-8').split('###')[1] for r in n_result]

	r.lpush('results', ','.join(result))
	r.delete(key)
	print(','.join(result))
No results found