Skip to content

Instantly share code, notes, and snippets.

@manashmandal
Last active March 6, 2017 11:54
Show Gist options
  • Save manashmandal/bd75db5b8eb6f709b7a4c978027cfcd6 to your computer and use it in GitHub Desktop.
Save manashmandal/bd75db5b8eb6f709b7a4c978027cfcd6 to your computer and use it in GitHub Desktop.
from __future__ import print_function
from gensim.models import KeyedVectors
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
# Loading the vectors
## [Warning] Takes a lot of time
en_model = KeyedVectors.load_word2vec_format('wiki.en/wiki.en.vec')
# Limit number of tokens to be visualized
limit = 500
vector_dim = 300
# Getting tokens and vectors
words = []
embedding = np.array([])
i = 0
for word in en_model.vocab:
# Break the loop if limit exceeds
if i == limit: break
# Getting token
words.append(word)
# Appending the vectors
embedding = np.append(embedding, en_model[word])
i += 1
# Reshaping the embedding vector
embedding = embedding.reshape(limit, vector_dim)
def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
plt.figure(figsize=(18, 18)) # in inches
for i, label in enumerate(labels):
x, y = low_dim_embs[i, :]
plt.scatter(x, y)
plt.annotate(label,
xy=(x, y),
xytext=(5, 2),
textcoords='offset points',
ha='right',
va='bottom')
plt.savefig(filename)
# Creating the tsne plot [Warning: will take time]
tsne = TSNE(perplexity=30.0, n_components=2, init='pca', n_iter=5000)
low_dim_embedding = tsne.fit_transform(embedding)
# Finally plotting and saving the fig
plot_with_labels(low_dim_embedding, words)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment