mmahbub · December 10, 2020 20:31
diff --git a/tsne-plot b/tsne-plot
 def tsnescatterplot(model, word, list_names, size):
    """ Plot in seaborn the results from the t-SNE dimensionality reduction algorithm of the vectors of a query word,
    its list of most similar words, and a list of words.
    """
    arrays = np.empty((0, size), dtype='f')
    word_labels = [word]
    color_list  = ['red']

    # adds the vector of the query word
    arrays = np.append(arrays, model.wv.__getitem__([word]), axis=0)
    
    # gets list of most similar words
    close_words = model.wv.most_similar([word])
    
    # adds the vector for each of the closest words to the array
    for wrd_score in close_words:
        wrd_vector = model.wv.__getitem__([wrd_score[0]])
        word_labels.append(wrd_score[0])
        color_list.append('blue')
        arrays = np.append(arrays, wrd_vector, axis=0)
    
    # adds the vector for each of the words from list_names to the array
    for wrd in list_names:
        wrd_vector = model.wv.__getitem__([wrd])
        word_labels.append(wrd)
        color_list.append('green')
        arrays = np.append(arrays, wrd_vector, axis=0)
        
    # Reduces the dimensionality from 300 or 50 to 21 dimensions with PCA
    reduc = PCA(n_components=21).fit_transform(arrays)
    
    # Finds t-SNE coordinates for 2 dimensions
    np.set_printoptions(suppress=True)
    
    Y = TSNE(n_components=2, random_state=0, perplexity=15).fit_transform(reduc)
    
    # Sets everything up to plot
    df = pd.DataFrame({'x': [x for x in Y[:, 0]],
                       'y': [y for y in Y[:, 1]],
                       'words': word_labels,
                       'color': color_list})
    
    fig, _ = plt.subplots()
    fig.set_size_inches(9, 9)
    
    # Basic plot
    p1 = sns.regplot(data=df,
                     x="x",
                     y="y",
                     fit_reg=False,
                     marker="o",
                     scatter_kws={'s': 40,
                                  'facecolors': df['color']
                                 }
                    )
    
    # Adds annotations one by one with a loop
    for line in range(0, df.shape[0]):
         p1.text(df["x"][line],
                 df['y'][line],
                 '  ' + df["words"][line].title(),
                 horizontalalignment='left',
                 verticalalignment='bottom', size='medium',
                 color=df['color'][line],
                 weight='normal'
                ).set_size(15)
    
    plt.xlim(Y[:, 0].min()-50, Y[:, 0].max()+50)
    plt.ylim(Y[:, 1].min()-50, Y[:, 1].max()+50)
            
    plt.title('t-SNE visualization for {}'.format(word.title()))
	def tsnescatterplot(model, word, list_names, size):
	""" Plot in seaborn the results from the t-SNE dimensionality reduction algorithm of the vectors of a query word,
	its list of most similar words, and a list of words.
	"""
	arrays = np.empty((0, size), dtype='f')
	word_labels = [word]
	color_list = ['red']

	# adds the vector of the query word
	arrays = np.append(arrays, model.wv.__getitem__([word]), axis=0)

	# gets list of most similar words
	close_words = model.wv.most_similar([word])

	# adds the vector for each of the closest words to the array
	for wrd_score in close_words:
	wrd_vector = model.wv.__getitem__([wrd_score[0]])
	word_labels.append(wrd_score[0])
	color_list.append('blue')
	arrays = np.append(arrays, wrd_vector, axis=0)

	# adds the vector for each of the words from list_names to the array
	for wrd in list_names:
	wrd_vector = model.wv.__getitem__([wrd])
	word_labels.append(wrd)
	color_list.append('green')
	arrays = np.append(arrays, wrd_vector, axis=0)

	# Reduces the dimensionality from 300 or 50 to 21 dimensions with PCA
	reduc = PCA(n_components=21).fit_transform(arrays)

	# Finds t-SNE coordinates for 2 dimensions
	np.set_printoptions(suppress=True)

	Y = TSNE(n_components=2, random_state=0, perplexity=15).fit_transform(reduc)

	# Sets everything up to plot
	df = pd.DataFrame({'x': [x for x in Y[:, 0]],
	'y': [y for y in Y[:, 1]],
	'words': word_labels,
	'color': color_list})

	fig, _ = plt.subplots()
	fig.set_size_inches(9, 9)

	# Basic plot
	p1 = sns.regplot(data=df,
	x="x",
	y="y",
	fit_reg=False,
	marker="o",
	scatter_kws={'s': 40,
	'facecolors': df['color']
	}
	)

	# Adds annotations one by one with a loop
	for line in range(0, df.shape[0]):
	p1.text(df["x"][line],
	df['y'][line],
	' ' + df["words"][line].title(),
	horizontalalignment='left',
	verticalalignment='bottom', size='medium',
	color=df['color'][line],
	weight='normal'
	).set_size(15)

	plt.xlim(Y[:, 0].min()-50, Y[:, 0].max()+50)
	plt.ylim(Y[:, 1].min()-50, Y[:, 1].max()+50)

	plt.title('t-SNE visualization for {}'.format(word.title()))
No results found