Created
December 6, 2020 05:41
-
-
Save sagorbrur/1ce49bda9a725924431996cfa5e41c9e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
from gensim.models import Word2Vec | |
from sklearn.manifold import TSNE | |
import matplotlib.pyplot as plt | |
import matplotlib.font_manager as fm | |
def tsne_plot(model, vocab): | |
"Creates and TSNE model and plots it" | |
labels = [] | |
tokens = [] | |
# prop = fm.FontProperties(fname='kalpurush.ttf') # if you use custom font | |
# for word in model.wv.vocab: | |
for word in vocab: | |
tokens.append(model[word]) | |
labels.append(word) | |
tsne_model = TSNE(perplexity=40, n_components=2, init='pca', n_iter=2500, random_state=23) | |
new_values = tsne_model.fit_transform(tokens) | |
x = [] | |
y = [] | |
for value in new_values: | |
x.append(value[0]) | |
y.append(value[1]) | |
plt.figure(figsize=(16, 16)) | |
for i in range(len(x)): | |
plt.scatter(x[i],y[i]) | |
plt.annotate(labels[i], | |
xy=(x[i], y[i]), | |
xytext=(5, 2), | |
textcoords='offset points', | |
ha='right', | |
va='bottom') | |
# plt.annotate(labels[i], # if you use font | |
# xy=(x[i], y[i]), | |
# xytext=(5, 2), | |
# textcoords='offset points', | |
# ha='right', | |
# va='bottom', | |
# fontproperties=prop) | |
# plt.show() | |
plt.savefig('wordvec.png') | |
def visualize_word2vec(model, vocab_size=1000): | |
model = Word2Vec.load(model) | |
print("Total Vocab: ", len(model.wv.vocab)) | |
vocab = dict(itertools.islice(model.wv.vocab.items(), vocab_size)) | |
print("Using for visualize: ", vocab_size) | |
tsne_plot(model, vocab) | |
if __name__=="__main__": | |
model = "word2vec.model" | |
vocab_size = 1000 | |
visualize_word2vec(model, vocab_size) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment