Last active
December 30, 2016 02:04
-
-
Save amn41/bf868f1cef01051dabc1f18bc4cf8fcf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def find_similar_words(embed,text,refs,thresh): | |
C = np.zeros((len(refs),embed.W.shape[1])) | |
for idx, term in enumerate(refs): | |
if term in embed.vocab: | |
C[idx,:] = embed.W[embed.vocab[term], :] | |
tokens = text.split(' ') | |
scores = [0.] * len(tokens) | |
found=[] | |
for idx, term in enumerate(tokens): | |
if term in embed.vocab: | |
vec = embed.W[embed.vocab[term], :] | |
cosines = np.dot(C,vec.T) | |
score = np.mean(cosines) | |
scores[idx] = score | |
if (score > thresh): | |
found.append(term) | |
print scores | |
return found |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment