Last active
March 6, 2025 22:44
-
-
Save nithyadurai87/4ee7536eb91eed7980eac20fc8531e7a to your computer and use it in GitHub Desktop.
04_word2vec.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.models import word2vec | |
paragraph = "Periyar was a social reformer in Tamil Nadu. He founded the Self-Respect Movement. This movement aimed to promote equality and end caste discrimination. Today, he is celebrated as a key figure in the fight for social justice and equality in Tamil Nadu" | |
x = [i for i in paragraph.split('.')] | |
x1= [[word for word in nltk.word_tokenize(sentence) if word.lower() not in nltk.corpus.stopwords.words('english')] for sentence in x] | |
model = word2vec.Word2Vec(x1, window=10, vector_size=5, min_count=1, sg=1, sample=1e-3) | |
print (model.wv.index_to_key) | |
print (model.wv['Periyar']) | |
print (model.wv.similarity('Self-Respect', 'equality')) | |
print (model.wv.most_similar(positive=['Periyar'])) | |
print (model.wv.most_similar(positive=['Self'])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment