Skip to content

Instantly share code, notes, and snippets.

@hadifar
Last active July 29, 2018 07:11
Show Gist options
  • Select an option

  • Save hadifar/8fbb58da63d381ce81897480aed9513c to your computer and use it in GitHub Desktop.

Select an option

Save hadifar/8fbb58da63d381ce81897480aed9513c to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from gensim.models import KeyedVectors
from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
import logging
logging.basicConfig(level=logging.INFO)
OUTPUT_FILE_PATH = './'
INPUT_FILE_PATH = './wiki.fa.text'
def train_model():
sentences = LineSentence(INPUT_FILE_PATH)
model = Word2Vec(sentences, size=200, window=5, sg=1)
model.wv.save_word2vec_format(OUTPUT_FILE_PATH + 'word2vec.txt', binary=False)
def load_model():
wiki_model = KeyedVectors.load_word2vec_format(OUTPUT_FILE_PATH + 'word2vec.txt')
most_similar = wiki_model.most_similar(u'ایران')
for words in most_similar:
print(words[0])
if __name__ == '__main__':
train_model()
load_model()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment