MLWhiz · January 18, 2019 05:54
diff --git a/create_glove_2.py b/create_glove_2.py
 def create_glove(word_index,embeddings_index):
    emb_mean,emb_std = -0.005838499,0.48782197
    all_embs = np.stack(embeddings_index.values())
    embed_size = all_embs.shape[1]
    nb_words = min(max_features, len(word_index))
    embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))

    count_found = nb_words
    for word, i in tqdm(word_index.items()):
        if i >= max_features: continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None: 
            embedding_matrix[i] =  embedding_vector
        else:
            if word.islower():
                # try to get the embedding of word in titlecase if lowercase is not present
                embedding_vector = embeddings_index.get(word.capitalize())
                if embedding_vector is not None: 
                    embedding_matrix[i] = embedding_vector
                else:
                    count_found-=1
            else:
                count_found-=1
    print("Got embedding for ",count_found," words.")
    return embedding_matrix
	def create_glove(word_index,embeddings_index):
	emb_mean,emb_std = -0.005838499,0.48782197
	all_embs = np.stack(embeddings_index.values())
	embed_size = all_embs.shape[1]
	nb_words = min(max_features, len(word_index))
	embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))

	count_found = nb_words
	for word, i in tqdm(word_index.items()):
	if i >= max_features: continue
	embedding_vector = embeddings_index.get(word)
	if embedding_vector is not None:
	embedding_matrix[i] = embedding_vector
	else:
	if word.islower():
	# try to get the embedding of word in titlecase if lowercase is not present
	embedding_vector = embeddings_index.get(word.capitalize())
	if embedding_vector is not None:
	embedding_matrix[i] = embedding_vector
	else:
	count_found-=1
	else:
	count_found-=1
	print("Got embedding for ",count_found," words.")
	return embedding_matrix