Skip to content

Instantly share code, notes, and snippets.

@DerekChia
Created December 3, 2018 10:33
Show Gist options
  • Save DerekChia/c8682e3d2d469838dcc72bcd27a024d6 to your computer and use it in GitHub Desktop.
Save DerekChia/c8682e3d2d469838dcc72bcd27a024d6 to your computer and use it in GitHub Desktop.
w2v_find_similar_words
# Find similar words
w2v.vec_sim("machine", 3)
class word2vec():
## Removed##
# Input vector, returns nearest word(s)
def vec_sim(self, word, top_n):
v_w1 = self.word_vec(word)
word_sim = {}
for i in range(self.v_count):
# Find the similary score for each word in vocab
v_w2 = self.w1[i]
theta_sum = np.dot(v_w1, v_w2)
theta_den = np.linalg.norm(v_w1) * np.linalg.norm(v_w2)
theta = theta_sum / theta_den
word = self.index_word[i]
word_sim[word] = theta
words_sorted = sorted(word_sim.items(), key=lambda kv: kv[1], reverse=True)
for word, sim in words_sorted[:top_n]:
print(word, sim)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment