Skip to content

Instantly share code, notes, and snippets.

View mbednarski's full-sized avatar

Mateusz Bednarski mbednarski

View GitHub Profile
import os
from sklearn.metrics import classification_report
from mlp import DumbModel, Dataset
def train_model(dataset_dir, model_file, vocab_size):
print(f'Training model from directory {dataset_dir}')
print(f'Vocabulary size: {vocab_size}')
train_dir = os.path.join(dataset_dir, 'train')
from setuptools import setup
setup(name='mlp',
packages=['mlp'],
version='0.0.1dev1',
)
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
class DumbModel:
def __init__(self, vocab_size=10_000):
self.vocab_size = vocab_size
# (...)
class Dataset:
def __init__(self, train_dir='data/raw/aclImdb/train', test_dir='data/raw/aclImdb/test'):
self.train_dir = Path(train_dir)
self.test_dir = Path(test_dir)
# (...)
import pickle
with open('tfidf.pickle', 'rb') as f:
tfidf = pickle.load(f)
with open('model.pickle', 'rb') as f:
clf = pickle.load(f)
x = input("Please enter your phrase: ")
y = clf.predict_proba(tfidf.transform([x]))
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
from dataset import Dataset
dset = Dataset()
from pathlib import Path
class Dataset:
def __init__(self):
self.train_dir = Path('data/raw/aclImdb/train')
self.test_dir = Path('data/raw/aclImdb/test')
def _get_set(self, limit=None, directory):
x = []
y = []
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
embedding_dims = 5
W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(), requires_grad=True)
W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True)
num_epochs = 100
learning_rate = 0.001
for epo in range(num_epochs):
loss_val = 0
for data, target in idx_pairs:
x = Variable(get_input_layer(data)).float()