This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_lexicon(): | |
lexicon = [] #create an empty list | |
with open('pos.txt', 'r') as f: #as we are already in the folder where file is stored, 'r' is used as we reading the file | |
lines = f.readlines() #read all the lines | |
for line in lines: | |
line = line.lower() #convert all the lines into lowercase letters | |
line = word_tokenize(line) #split the sentence into words | |
lexicon += line #add each word to lexicon | |
#repeat the same process with negative examples | |
with open('neg.txt', 'r') as f: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools | |
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null | |
!apt-get update -qq 2>&1 > /dev/null | |
!apt-get -y install -qq google-drive-ocamlfuse fuse | |
from google.colab import auth | |
auth.authenticate_user() | |
from oauth2client.client import GoogleCredentials | |
creds = GoogleCredentials.get_application_default() | |
import getpass | |
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!mkdir -p drive | |
!google-drive-ocamlfuse drive | |
import os | |
os.chdir('drive/google_colab/sentiment classification on 10k samples') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import numpy as np | |
import nltk | |
nltk.download() | |
from nltk.stem import WordNetLemmatizer | |
from nltk.tokenize import word_tokenize | |
import random | |
import pickle | |
from collections import Counter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lemmatizer = WordNetLemmatizer() #for ease of writting | |
hm_lines = 100000 #maximum number of lines that we are processing |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def feature_vectors(input_file_name,lexicon,classification): #classification is [0, 1] for positive and [1, 0] for negative | |
#it is similar to 0 for negative and 1 for positives | |
featureset = [] #creating empty list | |
with open(input_file_name,'r') as f: | |
contents = f.readlines() | |
for l in contents[:hm_lines]: #number of lines we need to process | |
current_words = word_tokenize(l.lower()) #converting sentence to lowercase and then splitting it to words | |
current_words = [lemmatizer.lemmatize(i) for i in current_words] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_feature_sets_and_labels(test_size = 0.1): #separate the data into training and testing | |
#test size is the size of the testing data | |
lexicon = create_lexicon() | |
features = [] | |
features += feature_vectors('pos.txt',lexicon,[1,0]) | |
features += feature_vector('neg.txt',lexicon,[0,1]) | |
random.shuffle(features) #to shuffle all the feature vectors | |
features = np.array(features) | |
testing_size = int(test_size*len(features)) # testing size will be .1 of the total data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# extract features from each photo in the directory | |
def extract_features(directory): | |
# load the model | |
model = Encoder() | |
# model.to(device) | |
model.eval() | |
# extract features from each photo | |
features = dict() | |
for i, name in enumerate(listdir(directory)): | |
# load an image from file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Encoder(nn.Module): | |
""" | |
Encodes the input image to a vector. | |
# """ | |
def __init__(self): | |
super(Encoder, self).__init__() | |
vgg = models.vgg16(pretrained=True) | |
model = torch.nn.Sequential() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Encoder(nn.Module): | |
""" | |
Encodes the input image to a vector. | |
# """ | |
def __init__(self): | |
super(Encoder, self).__init__() | |
vgg = models.vgg16(pretrained=True) | |
model = torch.nn.Sequential() |
OlderNewer