Skip to content

Instantly share code, notes, and snippets.

@pranjalAI
Created June 6, 2021 08:07
Show Gist options
  • Save pranjalAI/23a3812fd71a0f7d885516364c1b5a11 to your computer and use it in GitHub Desktop.
Save pranjalAI/23a3812fd71a0f7d885516364c1b5a11 to your computer and use it in GitHub Desktop.
def get_coefs(word,*arr):
return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.strip().split()) for o in open(EMBEDDING_FILE, encoding="utf8"))
all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()
emb_mean,emb_std
word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))
for word, i in word_index.items():
if i >= max_features:
continue
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None: embedding_matrix[i] = embedding_vector
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment