jojonki · December 18, 2017 21:29
diff --git a/load_wowrd2vec_binray_gensim.py b/load_wowrd2vec_binray_gensim.py
 from gensim.models.keyedvectors import KeyedVectors
 model_path = './data/GoogleNews-vectors-negative300.bin'
 model = KeyedVectors.load_word2vec_format('./data/GoogleNews-vectors-negative300.bin', binary=True)  
 model.wv['computer'] # array([  1.07421875e-01,  -2.01171875e-01,   1.23046875e-01,
 
 # you can use this weights like this.
 def load_embd_weights(word2vec, vocab_size, embd_size, w2i):
    embedding_matrix = np.zeros((vocab_size, embd_size))
    print('embed_matrix.shape', embedding_matrix.shape)
    found_ct = 0
    for word, idx in w2i.items():
        # words not found in embedding index will be all-zeros.
        if word in word2vec.wv:
            embedding_matrix[idx] = word2vec.wv[word]
            found_ct += 1
    print(found_ct, 'words are found in word2vec. vocab_size is', vocab_size)
    return torch.from_numpy(embedding_matrix).type(torch.FloatTensor)

 pre_embd_w = load_embdf_weights(model, vocab_size, embd_size, w2i)
 embd = nn.Embedding(vocab_size, embd_size)
 embd.weight = nn.Parameter(pre_embd_w, requires_grad=is_train_embd)
	from gensim.models.keyedvectors import KeyedVectors
	model_path = './data/GoogleNews-vectors-negative300.bin'
	model = KeyedVectors.load_word2vec_format('./data/GoogleNews-vectors-negative300.bin', binary=True)
	model.wv['computer'] # array([ 1.07421875e-01, -2.01171875e-01, 1.23046875e-01,

	# you can use this weights like this.
	def load_embd_weights(word2vec, vocab_size, embd_size, w2i):
	embedding_matrix = np.zeros((vocab_size, embd_size))
	print('embed_matrix.shape', embedding_matrix.shape)
	found_ct = 0
	for word, idx in w2i.items():
	# words not found in embedding index will be all-zeros.
	if word in word2vec.wv:
	embedding_matrix[idx] = word2vec.wv[word]
	found_ct += 1
	print(found_ct, 'words are found in word2vec. vocab_size is', vocab_size)
	return torch.from_numpy(embedding_matrix).type(torch.FloatTensor)

	pre_embd_w = load_embdf_weights(model, vocab_size, embd_size, w2i)
	embd = nn.Embedding(vocab_size, embd_size)
	embd.weight = nn.Parameter(pre_embd_w, requires_grad=is_train_embd)
No results found