behitek · June 9, 2017 02:58
diff --git a/GensimExample.py b/GensimExample.py
 import sys

 import gensim
 from gensim.models import word2vec


 def w2v(s1, s2, wordmodel):
    if s1 == s2:
        return 1.0

    s1words = s1.split()
    s2words = s2.split()
    s1wordsset = set(s1words)
    s2wordsset = set(s2words)
    vocab = wordmodel.vocab  # the vocabulary considered in the word embeddings
    if len(s1wordsset & s2wordsset) == 0:
        return 0.0
    for word in s1wordsset.copy():  # remove sentence words not found in the vocab
        if (word not in vocab):
            s1words.remove(word)
    for word in s2wordsset.copy():  # idem
        if (word not in vocab):
            s2words.remove(word)
    return wordmodel.n_similarity(s1words, s2words)


 if __name__ == '__main__':
    wordmodelfile = "C:\\Users\\Hieu Nguyen\\Desktop\\GoogleNews-vectors-negative300.bin.gz"
    wordmodel = gensim.models.KeyedVectors.load_word2vec_format(wordmodelfile, binary=True)
    s1 = "As California Bounces Back , Governor Calls For Lofty Goals"
    s2 = "With California Rebounding, Governor Pushes Big Projects"
    print
    "sim(s1,s2) = ", w2v(s1, s2, wordmodel), "/1."
    s3 = "Special measures for Beijing polution"
    s4 = "Smog cloud blankets Beijing"
    print
    "sim(s3,s4) = ", w2v(s3, s4, wordmodel), "/1."
	import sys

	import gensim
	from gensim.models import word2vec


	def w2v(s1, s2, wordmodel):
	if s1 == s2:
	return 1.0

	s1words = s1.split()
	s2words = s2.split()
	s1wordsset = set(s1words)
	s2wordsset = set(s2words)
	vocab = wordmodel.vocab # the vocabulary considered in the word embeddings
	if len(s1wordsset & s2wordsset) == 0:
	return 0.0
	for word in s1wordsset.copy(): # remove sentence words not found in the vocab
	if (word not in vocab):
	s1words.remove(word)
	for word in s2wordsset.copy(): # idem
	if (word not in vocab):
	s2words.remove(word)
	return wordmodel.n_similarity(s1words, s2words)


	if __name__ == '__main__':
	wordmodelfile = "C:\\Users\\Hieu Nguyen\\Desktop\\GoogleNews-vectors-negative300.bin.gz"
	wordmodel = gensim.models.KeyedVectors.load_word2vec_format(wordmodelfile, binary=True)
	s1 = "As California Bounces Back , Governor Calls For Lofty Goals"
	s2 = "With California Rebounding, Governor Pushes Big Projects"
	print
	"sim(s1,s2) = ", w2v(s1, s2, wordmodel), "/1."
	s3 = "Special measures for Beijing polution"
	s4 = "Smog cloud blankets Beijing"
	print
	"sim(s3,s4) = ", w2v(s3, s4, wordmodel), "/1."
No results found