rohithteja · August 23, 2021 13:21
diff --git a/vectorization.py b/vectorization.py
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.model_selection import train_test_split

 #vectorization
 cv = CountVectorizer(lowercase= False) 
 text_vector = cv.fit_transform(df.text.values)

 x = text_vector
 y = df.iloc[:,-1].values

 # train validation test split
 x_train, xtest, y_train, ytest = train_test_split(x, y, stratify = y, 
                                                  test_size=0.20, random_state=42)

 x_val, x_test, y_val, y_test = train_test_split(xtest, ytest,stratify = ytest, 
                                                test_size=0.5, random_state=42)
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.model_selection import train_test_split

	#vectorization
	cv = CountVectorizer(lowercase= False)
	text_vector = cv.fit_transform(df.text.values)

	x = text_vector
	y = df.iloc[:,-1].values

	# train validation test split
	x_train, xtest, y_train, ytest = train_test_split(x, y, stratify = y,
	test_size=0.20, random_state=42)

	x_val, x_test, y_val, y_test = train_test_split(xtest, ytest,stratify = ytest,
	test_size=0.5, random_state=42)
No results found