MLWhiz · January 18, 2019 05:55
diff --git a/create_glove_3.py b/create_glove_3.py
 def create_glove(word_index,embeddings_index):
    emb_mean,emb_std = -0.005838499,0.48782197
    all_embs = np.stack(embeddings_index.values())
    embed_size = all_embs.shape[1]
    nb_words = min(max_features, len(word_index))
    embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size+4))
    
    count_found = nb_words
    for word, i in tqdm(word_index.items()):
        if i >= max_features: continue
        embedding_vector = embeddings_index.get(word)
        word_sent = TextBlob(word).sentiment
        # Extra information we are passing to our embeddings
        extra_embed = [word_sent.polarity,word_sent.subjectivity]
        if embedding_vector is not None: 
            embedding_matrix[i] =  np.append(embedding_vector,extra_embed)
        else:
            if word.islower():
                embedding_vector = embeddings_index.get(word.capitalize())
                if embedding_vector is not None: 
                    embedding_matrix[i] = np.append(embedding_vector,extra_embed)
                else:
                    embedding_matrix[i,300:] = extra_embed
                    count_found-=1
            else:
                embedding_matrix[i,300:] = extra_embed
                count_found-=1
    print("Got embedding for ",count_found," words.")
    return embedding_matrix
	def create_glove(word_index,embeddings_index):
	emb_mean,emb_std = -0.005838499,0.48782197
	all_embs = np.stack(embeddings_index.values())
	embed_size = all_embs.shape[1]
	nb_words = min(max_features, len(word_index))
	embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size+4))

	count_found = nb_words
	for word, i in tqdm(word_index.items()):
	if i >= max_features: continue
	embedding_vector = embeddings_index.get(word)
	word_sent = TextBlob(word).sentiment
	# Extra information we are passing to our embeddings
	extra_embed = [word_sent.polarity,word_sent.subjectivity]
	if embedding_vector is not None:
	embedding_matrix[i] = np.append(embedding_vector,extra_embed)
	else:
	if word.islower():
	embedding_vector = embeddings_index.get(word.capitalize())
	if embedding_vector is not None:
	embedding_matrix[i] = np.append(embedding_vector,extra_embed)
	else:
	embedding_matrix[i,300:] = extra_embed
	count_found-=1
	else:
	embedding_matrix[i,300:] = extra_embed
	count_found-=1
	print("Got embedding for ",count_found," words.")
	return embedding_matrix