Skip to content

Instantly share code, notes, and snippets.

@MLWhiz
Created January 18, 2019 05:54
Show Gist options
  • Save MLWhiz/b3e15c91d772b08e1e1b87de06665bf4 to your computer and use it in GitHub Desktop.
Save MLWhiz/b3e15c91d772b08e1e1b87de06665bf4 to your computer and use it in GitHub Desktop.
def create_glove(word_index,embeddings_index):
emb_mean,emb_std = -0.005838499,0.48782197
all_embs = np.stack(embeddings_index.values())
embed_size = all_embs.shape[1]
nb_words = min(max_features, len(word_index))
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))
count_found = nb_words
for word, i in tqdm(word_index.items()):
if i >= max_features: continue
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
else:
if word.islower():
# try to get the embedding of word in titlecase if lowercase is not present
embedding_vector = embeddings_index.get(word.capitalize())
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
else:
count_found-=1
else:
count_found-=1
print("Got embedding for ",count_found," words.")
return embedding_matrix
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment