ASHUTOSH KUMAR ashunigion

💭

Woking on private projects

standing on shoulders of GIANTS!

ashunigion / hyperparameters.py

Created June 13, 2019 02:53

The hyperparameters used to instantiate the RNN-sentiment model

	# Instantiate the model w/ hyperparams
	vocab_size = len(vocab_to_int) + 1
	output_size = 1
	embedding_dim = 200
	hidden_dim = 256
	n_layers = 2

ashunigion / RNN-sentiment.py

Created June 13, 2019 02:50

RNN model architecture for sentiment-classification

	import torch.nn as nn

	class SentimentRNN(nn.Module):
	"""
	The RNN model that will be used to perform Sentiment analysis.
	"""

	def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
	"""
	Initialize the model by setting up the layers.

ashunigion / Dataloader.py

Created June 12, 2019 00:27

Creation of train, test,validation dataloader

	import torch
	from torch.utils.data import TensorDataset, DataLoader

	# create Tensor datasets
	train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
	valid_data = TensorDataset(torch.from_numpy(valid_x), torch.from_numpy(valid_y))
	test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))

	# dataloaders
	batch_size = 50

ashunigion / Train_test_validation.py

Created June 12, 2019 00:00

Train, test, validation split

	from sklearn.cross_validation import train_test_split

	split_frac = 0.8

	## split data into training, validation, and test data (features and labels, x and y)
	train_x,test_x,train_y,test_y = train_test_split(features, encoded_labels, test_size = 0.2)
	test_x,valid_x,test_y,valid_y = train_test_split(test_x,test_y, test_size = 0.5)

	## print out the shapes of your resultant feature data
	print((train_x.shape), (test_x.shape), (valid_x.shape))

ashunigion / padding.py

Created June 11, 2019 23:45

Padding the smaller reviews and truncating the large reviews

	def pad_features(reviews_ints, seq_length):
	''' Return features of review_ints, where each review is padded with 0's
	or truncated to the input seq_length.
	'''
	features = []

	## implement function
	for review in reviews_ints:
	if len(review)<seq_length:
	features.append(list(np.zeros(seq_length-len(review)))+review)

ashunigion / Preprocess.py

Created June 11, 2019 15:20

Tokenizing the reviews for sentiment analysis

	# feel free to use this import
	from collections import Counter

	temp = Counter(words)
	temp = temp.most_common()

	## Build a dictionary that maps words to integers
	vocab_to_int = {}
	i = 1
	for pair in temp:

ashunigion / NegativeSamplingLoss.py

Created June 11, 2019 03:48

custom loss function with Negative Sampling

	class NegativeSamplingLoss(nn.Module):
	def __init__(self):
	super().__init__()

	def forward(self, input_vectors, output_vectors, noise_vectors):

	batch_size, embed_size = input_vectors.shape

	# Input vectors should be a batch of column vectors
	input_vectors = input_vectors.view(batch_size, embed_size, 1)

ashunigion / SkipGramNeg.py

Created June 11, 2019 03:45

The skip gram model with negative sampling

	class SkipGramNeg(nn.Module):
	def __init__(self, n_vocab, n_embed, noise_dist=None):
	super().__init__()

	self.n_vocab = n_vocab
	self.n_embed = n_embed
	self.noise_dist = noise_dist

	# define embedding layers for input and output words
	self.in_embed = nn.Embedding(n_vocab, n_embed)

ashunigion / Model_arch.py

Created June 10, 2019 16:23

Model architecture to train word-embedding

	class SkipGram(nn.Module):
	def __init__(self, n_vocab, n_embed):
	super().__init__()

	# complete this SkipGram model
	self.embed = nn.Embedding(n_vocab, n_embed)
	self.output = nn.Linear(n_embed, n_vocab)
	self.log_softmax = nn.LogSoftmax(dim=1)

	def forward(self, x):

ashunigion / batching.py

Created June 10, 2019 01:27

creating the batch of words and and their corresponding context words.

	def get_batches(words, batch_size, window_size=5):
	''' Create a generator of word batches as a tuple (inputs, targets) '''

	n_batches = len(words)//batch_size

	# only full batches
	words = words[:n_batches*batch_size]

	for idx in range(0, len(words), batch_size):
	x, y = [], []