Skip to content

Instantly share code, notes, and snippets.

View mbednarski's full-sized avatar

Mateusz Bednarski mbednarski

View GitHub Profile
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from pathlib import Path
class Dataset:
def __init__(self):
self.train_dir = Path('data/raw/aclImdb/train')
self.test_dir = Path('data/raw/aclImdb/test')
def _get_set(self, limit=None, directory):
x = []
y = []
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
from dataset import Dataset
dset = Dataset()
import pickle
with open('tfidf.pickle', 'rb') as f:
tfidf = pickle.load(f)
with open('model.pickle', 'rb') as f:
clf = pickle.load(f)
x = input("Please enter your phrase: ")
y = clf.predict_proba(tfidf.transform([x]))
# (...)
class Dataset:
def __init__(self, train_dir='data/raw/aclImdb/train', test_dir='data/raw/aclImdb/test'):
self.train_dir = Path(train_dir)
self.test_dir = Path(test_dir)
# (...)
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
class DumbModel:
def __init__(self, vocab_size=10_000):
self.vocab_size = vocab_size
from setuptools import setup
setup(name='mlp',
packages=['mlp'],
version='0.0.1dev1',
)
import os
from sklearn.metrics import classification_report
from mlp import DumbModel, Dataset
def train_model(dataset_dir, model_file, vocab_size):
print(f'Training model from directory {dataset_dir}')
print(f'Vocabulary size: {vocab_size}')
train_dir = os.path.join(dataset_dir, 'train')
"""MLP - machine-learning-production
Usage:
mlp.py train <dataset-dir> <model-file> [--vocab-size=<vocab-size>]
mlp.py ask <model-file> <question>
mlp.py (-h | --help)
Arguments:
<dataset-dir> Directory with dataset.
<model-file> Serialized model file.