from sklearn.feature_extraction.text import CountVectorizer from sklearn.linear_model import LogisticRegression train_text = df_train_augmented.text.tolist() X_train = CountVectorizer(ngram_range=(1, 2)).fit_transform(train_text) clf = LogisticRegression(solver="lbfgs") clf.fit(X=X_train, y=df_train_augmented.label.values)