This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Instantiate the model w/ hyperparams | |
vocab_size = len(vocab_to_int) + 1 | |
output_size = 1 | |
embedding_dim = 200 | |
hidden_dim = 256 | |
n_layers = 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch.nn as nn | |
class SentimentRNN(nn.Module): | |
""" | |
The RNN model that will be used to perform Sentiment analysis. | |
""" | |
def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5): | |
""" | |
Initialize the model by setting up the layers. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.utils.data import TensorDataset, DataLoader | |
# create Tensor datasets | |
train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y)) | |
valid_data = TensorDataset(torch.from_numpy(valid_x), torch.from_numpy(valid_y)) | |
test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y)) | |
# dataloaders | |
batch_size = 50 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.cross_validation import train_test_split | |
split_frac = 0.8 | |
## split data into training, validation, and test data (features and labels, x and y) | |
train_x,test_x,train_y,test_y = train_test_split(features, encoded_labels, test_size = 0.2) | |
test_x,valid_x,test_y,valid_y = train_test_split(test_x,test_y, test_size = 0.5) | |
## print out the shapes of your resultant feature data | |
print((train_x.shape), (test_x.shape), (valid_x.shape)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def pad_features(reviews_ints, seq_length): | |
''' Return features of review_ints, where each review is padded with 0's | |
or truncated to the input seq_length. | |
''' | |
features = [] | |
## implement function | |
for review in reviews_ints: | |
if len(review)<seq_length: | |
features.append(list(np.zeros(seq_length-len(review)))+review) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# feel free to use this import | |
from collections import Counter | |
temp = Counter(words) | |
temp = temp.most_common() | |
## Build a dictionary that maps words to integers | |
vocab_to_int = {} | |
i = 1 | |
for pair in temp: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NegativeSamplingLoss(nn.Module): | |
def __init__(self): | |
super().__init__() | |
def forward(self, input_vectors, output_vectors, noise_vectors): | |
batch_size, embed_size = input_vectors.shape | |
# Input vectors should be a batch of column vectors | |
input_vectors = input_vectors.view(batch_size, embed_size, 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SkipGramNeg(nn.Module): | |
def __init__(self, n_vocab, n_embed, noise_dist=None): | |
super().__init__() | |
self.n_vocab = n_vocab | |
self.n_embed = n_embed | |
self.noise_dist = noise_dist | |
# define embedding layers for input and output words | |
self.in_embed = nn.Embedding(n_vocab, n_embed) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SkipGram(nn.Module): | |
def __init__(self, n_vocab, n_embed): | |
super().__init__() | |
# complete this SkipGram model | |
self.embed = nn.Embedding(n_vocab, n_embed) | |
self.output = nn.Linear(n_embed, n_vocab) | |
self.log_softmax = nn.LogSoftmax(dim=1) | |
def forward(self, x): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_batches(words, batch_size, window_size=5): | |
''' Create a generator of word batches as a tuple (inputs, targets) ''' | |
n_batches = len(words)//batch_size | |
# only full batches | |
words = words[:n_batches*batch_size] | |
for idx in range(0, len(words), batch_size): | |
x, y = [], [] |
NewerOlder