Skip to content

Instantly share code, notes, and snippets.

@pranjalAI
Created September 4, 2020 14:30
Show Gist options
  • Save pranjalAI/eeca564bb8b3f489cc4572e54354d43d to your computer and use it in GitHub Desktop.
Save pranjalAI/eeca564bb8b3f489cc4572e54354d43d to your computer and use it in GitHub Desktop.
def emb_mat(nb_words):
EMBEDDING_FILE="glove.6B.100d.txt"
def get_coefs(word,*arr):
return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.strip().split()) for o in open(EMBEDDING_FILE, encoding="utf8"))
all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()
emb_mean,emb_std
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words+1, embed_size))
for word, i in word_index.items():
if (i >= max_features) or i==nb_words:
continue
embedding_vector = embeddings_index.get(word) #here we will get embedding for each word from GloVe
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
return embedding_matrix
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment