Created
September 4, 2020 14:30
-
-
Save pranjalAI/eeca564bb8b3f489cc4572e54354d43d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def emb_mat(nb_words): | |
EMBEDDING_FILE="glove.6B.100d.txt" | |
def get_coefs(word,*arr): | |
return word, np.asarray(arr, dtype='float32') | |
embeddings_index = dict(get_coefs(*o.strip().split()) for o in open(EMBEDDING_FILE, encoding="utf8")) | |
all_embs = np.stack(embeddings_index.values()) | |
emb_mean,emb_std = all_embs.mean(), all_embs.std() | |
emb_mean,emb_std | |
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words+1, embed_size)) | |
for word, i in word_index.items(): | |
if (i >= max_features) or i==nb_words: | |
continue | |
embedding_vector = embeddings_index.get(word) #here we will get embedding for each word from GloVe | |
if embedding_vector is not None: | |
embedding_matrix[i] = embedding_vector | |
return embedding_matrix |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment