Skip to content

Instantly share code, notes, and snippets.

@mbednarski
Created November 7, 2018 22:00
Show Gist options
  • Save mbednarski/ae8a2d3b9c2436534551cae2a09bdda7 to your computer and use it in GitHub Desktop.
Save mbednarski/ae8a2d3b9c2436534551cae2a09bdda7 to your computer and use it in GitHub Desktop.
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
from dataset import Dataset
dset = Dataset()
X_train, y_train = dset.get_train_set(limit=10000)
X_test, y_test = dset.get_test_set(limit=10000)
tfidf = TfidfVectorizer(X_train, max_features=10000)
X_train = tfidf.fit_transform(X_train)
with open('tfidf.pickle', 'wb') as f:
pickle.dump(tfidf, f)
clf = MultinomialNB()
clf.fit(X_train, y_train)
with open('model.pickle', 'wb') as f:
pickle.dump(clf, f)
X_test = tfidf.transform(X_test)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment