Skip to content

Instantly share code, notes, and snippets.

@Venkatstatistics
Last active August 10, 2019 16:33
Show Gist options
  • Save Venkatstatistics/d15fec2e5e01bc2a59b64a2e0f18b876 to your computer and use it in GitHub Desktop.
Save Venkatstatistics/d15fec2e5e01bc2a59b64a2e0f18b876 to your computer and use it in GitHub Desktop.
def process():
import en_vectors_web_lg
nlp = en_vectors_web_lg.load()
topicdf = pd.read_csv("small_Topics.csv", encoding='Latin-1')
topics = topicdf.Topic.tolist()
while True:
big_keyword = r.lpop('big_keywords').decode('utf-8')
if not big_keyword:
break
key = 'keyword_score###{}'.format(big_keyword)
for topic in topics:
score = nlp(str(big_keyword)).similarity(nlp(str(topic)))
elem = "{}###{}###{}".format(big_keyword, topic, score)
r.zadd(key, {elem: score})
n_result = r.zrevrangebyscore(key, "+inf", "-inf", start=0, num=SIMILAR_COUNT)
result = [big_keyword] + [r.decode('utf-8').split('###')[1] for r in n_result]
r.lpush('results', ','.join(result))
r.delete(key)
print(','.join(result))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment