This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| from sklearn.metrics import classification_report | |
| from mlp import DumbModel, Dataset | |
| def train_model(dataset_dir, model_file, vocab_size): | |
| print(f'Training model from directory {dataset_dir}') | |
| print(f'Vocabulary size: {vocab_size}') | |
| train_dir = os.path.join(dataset_dir, 'train') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from setuptools import setup | |
| setup(name='mlp', | |
| packages=['mlp'], | |
| version='0.0.1dev1', | |
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pickle | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.metrics import classification_report | |
| class DumbModel: | |
| def __init__(self, vocab_size=10_000): | |
| self.vocab_size = vocab_size |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # (...) | |
| class Dataset: | |
| def __init__(self, train_dir='data/raw/aclImdb/train', test_dir='data/raw/aclImdb/test'): | |
| self.train_dir = Path(train_dir) | |
| self.test_dir = Path(test_dir) | |
| # (...) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pickle | |
| with open('tfidf.pickle', 'rb') as f: | |
| tfidf = pickle.load(f) | |
| with open('model.pickle', 'rb') as f: | |
| clf = pickle.load(f) | |
| x = input("Please enter your phrase: ") | |
| y = clf.predict_proba(tfidf.transform([x])) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pickle | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.metrics import classification_report | |
| from dataset import Dataset | |
| dset = Dataset() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pathlib import Path | |
| class Dataset: | |
| def __init__(self): | |
| self.train_dir = Path('data/raw/aclImdb/train') | |
| self.test_dir = Path('data/raw/aclImdb/test') | |
| def _get_set(self, limit=None, directory): | |
| x = [] | |
| y = [] |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| embedding_dims = 5 | |
| W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(), requires_grad=True) | |
| W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True) | |
| num_epochs = 100 | |
| learning_rate = 0.001 | |
| for epo in range(num_epochs): | |
| loss_val = 0 | |
| for data, target in idx_pairs: | |
| x = Variable(get_input_layer(data)).float() |