Skip to content

Instantly share code, notes, and snippets.

@Venkatstatistics
Created September 20, 2019 14:56
Show Gist options
  • Save Venkatstatistics/3419c479922057641a139f6e9241fb32 to your computer and use it in GitHub Desktop.
Save Venkatstatistics/3419c479922057641a139f6e9241fb32 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from gensim.models.word2vec import Word2Vec
import gensim.downloader as api
#corpus = api.load('word2vec-google-news-300')
#corpus = api.load('glove-wiki-gigaword-100')
#model = api.load('glove-wiki-gigaword-100')
corpus = api.load('text8') # download the corpus and return it opened as an iterable
model = Word2Vec(corpus) # train a model from the corpus
model.most_similar("soccer",topn=3)
model.wv.most_similar(positive=['woman', 'king'], negative=['man'])
def analogy(x1, x2, y1):
result = model.wv.most_similar(positive=[y1, x2], negative=[x1])
return result[0][0]
analogy('china', 'chinese', 'japan')
import matplotlib.pyplot as plt
import numpy as np
plt.style.use('ggplot')
from sklearn.decomposition import PCA
def display_pca_scatterplot(model, words=None, sample=0):
if words == None:
if sample > 0:
words = np.random.choice(list(model.vocab.keys()), sample)
else:
words = [ word for word in model.vocab ]
word_vectors = np.array([model[w] for w in words])
twodim = PCA().fit_transform(word_vectors)[:,:2]
plt.figure(figsize=(6,6))
plt.scatter(twodim[:,0], twodim[:,1], edgecolors='k', c='r')
for word, (x,y) in zip(words, twodim):
plt.text(x+0.05, y+0.05, word)
display_pca_scatterplot(model,
['coffee', 'tea', 'beer', 'wine','pizza',
'dog', 'horse', 'cat','football','tennis'])
display_pca_scatterplot(model, sample=100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment