Skip to content

Instantly share code, notes, and snippets.

@mbednarski
Last active November 8, 2018 18:13
Show Gist options
  • Save mbednarski/cc9b6f8d8e945fba2e32ac949bb2fa1f to your computer and use it in GitHub Desktop.
Save mbednarski/cc9b6f8d8e945fba2e32ac949bb2fa1f to your computer and use it in GitHub Desktop.
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
class DumbModel:
def __init__(self, vocab_size=10_000):
self.vocab_size = vocab_size
self.clf = None
self.vectorizer = None
def train(self, X_train, y_train):
self.vectorizer = TfidfVectorizer(max_features=self.vocab_size)
X_train = self.vectorizer.fit_transform(X_train)
self.clf = MultinomialNB()
self.clf.fit(X_train, y_train)
def predict_proba(self, X):
X = self.vectorizer.transform(X)
y_proba = self.clf.predict_proba(X)
return y_proba
def predict(self, X):
X = self.vectorizer.transform(X)
y_pred = self.clf.predict(X)
return y_pred
def serialize(self, fname):
with open(fname, 'wb') as f:
pickle.dump(self.vocab_size, f)
pickle.dump(self.vectorizer, f)
pickle.dump(self.clf, f)
@staticmethod
def deserialize(fname):
model = DumbModel()
with open(fname, 'rb') as f:
model.vocab_size = pickle.load(f)
model.vectorizer = pickle.load(f)
model.clf = pickle.load(f)
return model
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment