Last active
March 6, 2017 11:54
-
-
Save manashmandal/bd75db5b8eb6f709b7a4c978027cfcd6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from __future__ import print_function | |
| from gensim.models import KeyedVectors | |
| import numpy as np | |
| from sklearn.manifold import TSNE | |
| import matplotlib.pyplot as plt | |
| # Loading the vectors | |
| ## [Warning] Takes a lot of time | |
| en_model = KeyedVectors.load_word2vec_format('wiki.en/wiki.en.vec') | |
| # Limit number of tokens to be visualized | |
| limit = 500 | |
| vector_dim = 300 | |
| # Getting tokens and vectors | |
| words = [] | |
| embedding = np.array([]) | |
| i = 0 | |
| for word in en_model.vocab: | |
| # Break the loop if limit exceeds | |
| if i == limit: break | |
| # Getting token | |
| words.append(word) | |
| # Appending the vectors | |
| embedding = np.append(embedding, en_model[word]) | |
| i += 1 | |
| # Reshaping the embedding vector | |
| embedding = embedding.reshape(limit, vector_dim) | |
| def plot_with_labels(low_dim_embs, labels, filename='tsne.png'): | |
| assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" | |
| plt.figure(figsize=(18, 18)) # in inches | |
| for i, label in enumerate(labels): | |
| x, y = low_dim_embs[i, :] | |
| plt.scatter(x, y) | |
| plt.annotate(label, | |
| xy=(x, y), | |
| xytext=(5, 2), | |
| textcoords='offset points', | |
| ha='right', | |
| va='bottom') | |
| plt.savefig(filename) | |
| # Creating the tsne plot [Warning: will take time] | |
| tsne = TSNE(perplexity=30.0, n_components=2, init='pca', n_iter=5000) | |
| low_dim_embedding = tsne.fit_transform(embedding) | |
| # Finally plotting and saving the fig | |
| plot_with_labels(low_dim_embedding, words) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment