MLWhiz · February 9, 2019 08:07
diff --git a/glove_embedding.py b/glove_embedding.py
 # load the GloVe vectors in a dictionary:
 def load_glove_index():
    EMBEDDING_FILE = '../input/embeddings/glove.840B.300d/glove.840B.300d.txt'
    def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')[:300]
    embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE))
    return embeddings_index

 embeddings_index = load_glove_index()

 print('Found %s word vectors.' % len(embeddings_index))

 from nltk.corpus import stopwords
 stop_words = stopwords.words('english')
 def sent2vec(s):
    words = str(s).lower()
    words = word_tokenize(words)
    words = [w for w in words if not w in stop_words]
    words = [w for w in words if w.isalpha()]
    M = []
    for w in words:
        try:
            M.append(embeddings_index[w])
        except:
            continue
    M = np.array(M)
    v = M.sum(axis=0)
    if type(v) != np.ndarray:
        return np.zeros(300)
    return v / np.sqrt((v ** 2).sum())

 # create glove features
 xtrain_glove = np.array([sent2vec(x) for x in tqdm(train_df.cleaned_text.values)])
 xtest_glove = np.array([sent2vec(x) for x in tqdm(test_df.cleaned_text.values)])
	# load the GloVe vectors in a dictionary:
	def load_glove_index():
	EMBEDDING_FILE = '../input/embeddings/glove.840B.300d/glove.840B.300d.txt'
	def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')[:300]
	embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE))
	return embeddings_index

	embeddings_index = load_glove_index()

	print('Found %s word vectors.' % len(embeddings_index))

	from nltk.corpus import stopwords
	stop_words = stopwords.words('english')
	def sent2vec(s):
	words = str(s).lower()
	words = word_tokenize(words)
	words = [w for w in words if not w in stop_words]
	words = [w for w in words if w.isalpha()]
	M = []
	for w in words:
	try:
	M.append(embeddings_index[w])
	except:
	continue
	M = np.array(M)
	v = M.sum(axis=0)
	if type(v) != np.ndarray:
	return np.zeros(300)
	return v / np.sqrt((v ** 2).sum())

	# create glove features
	xtrain_glove = np.array([sent2vec(x) for x in tqdm(train_df.cleaned_text.values)])
	xtest_glove = np.array([sent2vec(x) for x in tqdm(test_df.cleaned_text.values)])
No results found