Skip to content

Instantly share code, notes, and snippets.

@sagorbrur
Created December 6, 2020 05:41
Show Gist options
  • Save sagorbrur/1ce49bda9a725924431996cfa5e41c9e to your computer and use it in GitHub Desktop.
Save sagorbrur/1ce49bda9a725924431996cfa5e41c9e to your computer and use it in GitHub Desktop.
import itertools
from gensim.models import Word2Vec
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
def tsne_plot(model, vocab):
"Creates and TSNE model and plots it"
labels = []
tokens = []
# prop = fm.FontProperties(fname='kalpurush.ttf') # if you use custom font
# for word in model.wv.vocab:
for word in vocab:
tokens.append(model[word])
labels.append(word)
tsne_model = TSNE(perplexity=40, n_components=2, init='pca', n_iter=2500, random_state=23)
new_values = tsne_model.fit_transform(tokens)
x = []
y = []
for value in new_values:
x.append(value[0])
y.append(value[1])
plt.figure(figsize=(16, 16))
for i in range(len(x)):
plt.scatter(x[i],y[i])
plt.annotate(labels[i],
xy=(x[i], y[i]),
xytext=(5, 2),
textcoords='offset points',
ha='right',
va='bottom')
# plt.annotate(labels[i], # if you use font
# xy=(x[i], y[i]),
# xytext=(5, 2),
# textcoords='offset points',
# ha='right',
# va='bottom',
# fontproperties=prop)
# plt.show()
plt.savefig('wordvec.png')
def visualize_word2vec(model, vocab_size=1000):
model = Word2Vec.load(model)
print("Total Vocab: ", len(model.wv.vocab))
vocab = dict(itertools.islice(model.wv.vocab.items(), vocab_size))
print("Using for visualize: ", vocab_size)
tsne_plot(model, vocab)
if __name__=="__main__":
model = "word2vec.model"
vocab_size = 1000
visualize_word2vec(model, vocab_size)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment