gauravbansal98’s gists

gauravbansal98 / Create lexicon

Last active May 24, 2018 09:13

	def create_lexicon():
	lexicon = [] #create an empty list
	with open('pos.txt', 'r') as f: #as we are already in the folder where file is stored, 'r' is used as we reading the file
	lines = f.readlines() #read all the lines
	for line in lines:
	line = line.lower() #convert all the lines into lowercase letters
	line = word_tokenize(line) #split the sentence into words
	lexicon += line #add each word to lexicon
	#repeat the same process with negative examples
	with open('neg.txt', 'r') as f:

gauravbansal98 / Connect to your google drive

Created May 24, 2018 05:37

	!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
	!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
	!apt-get update -qq 2>&1 > /dev/null
	!apt-get -y install -qq google-drive-ocamlfuse fuse
	from google.colab import auth
	auth.authenticate_user()
	from oauth2client.client import GoogleCredentials
	creds = GoogleCredentials.get_application_default()
	import getpass
	!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 \| grep URL

gauravbansal98 / go to your folder where files are stored

Created May 24, 2018 05:39

	!mkdir -p drive
	!google-drive-ocamlfuse drive
	import os
	os.chdir('drive/google_colab/sentiment classification on 10k samples')

gauravbansal98 / import libraries

Created May 24, 2018 05:40

	import tensorflow as tf
	import numpy as np
	import nltk
	nltk.download()
	from nltk.stem import WordNetLemmatizer
	from nltk.tokenize import word_tokenize
	import random
	import pickle
	from collections import Counter

gauravbansal98 / defining variables

Created May 24, 2018 05:41

	lemmatizer = WordNetLemmatizer() #for ease of writting
	hm_lines = 100000 #maximum number of lines that we are processing

gauravbansal98 / Create feature vector

Last active May 24, 2018 08:07

	def feature_vectors(input_file_name,lexicon,classification): #classification is [0, 1] for positive and [1, 0] for negative
	#it is similar to 0 for negative and 1 for positives

	featureset = [] #creating empty list

	with open(input_file_name,'r') as f:
	contents = f.readlines()
	for l in contents[:hm_lines]: #number of lines we need to process
	current_words = word_tokenize(l.lower()) #converting sentence to lowercase and then splitting it to words
	current_words = [lemmatizer.lemmatize(i) for i in current_words]

gauravbansal98 / Make training and testing labels

Last active May 24, 2018 07:36

	def create_feature_sets_and_labels(test_size = 0.1): #separate the data into training and testing
	#test size is the size of the testing data
	lexicon = create_lexicon()
	features = []
	features += feature_vectors('pos.txt',lexicon,[1,0])
	features += feature_vector('neg.txt',lexicon,[0,1])
	random.shuffle(features) #to shuffle all the feature vectors

	features = np.array(features)
	testing_size = int(test_size*len(features)) # testing size will be .1 of the total data

gauravbansal98 / extract features

Created May 13, 2020 12:03

	# extract features from each photo in the directory
	def extract_features(directory):
	# load the model
	model = Encoder()
	# model.to(device)
	model.eval()
	# extract features from each photo
	features = dict()
	for i, name in enumerate(listdir(directory)):
	# load an image from file

gauravbansal98 / Encoder

Created May 13, 2020 12:10

	class Encoder(nn.Module):
	"""
	Encodes the input image to a vector.
	# """
	def __init__(self):
	super(Encoder, self).__init__()

	vgg = models.vgg16(pretrained=True)

	model = torch.nn.Sequential()

gauravbansal98 / Encoder

Created May 13, 2020 12:12

	class Encoder(nn.Module):
	"""
	Encodes the input image to a vector.
	# """
	def __init__(self):
	super(Encoder, self).__init__()

	vgg = models.vgg16(pretrained=True)

	model = torch.nn.Sequential()

Gaurav Bansal gauravbansal98