Skip to content

Instantly share code, notes, and snippets.

View frenzy2106's full-sized avatar

Ankit Choudhary frenzy2106

View GitHub Profile
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))
# unzip the test file to read images
!unzip /content/drive/My\ Drive/test_ScVgIM0.zip
# Read test file names
test = pd.read_csv('test.csv')
test_copy = test.copy()
# Read test images and preprocess
test_image = []
for i in tqdm(range(test.shape[0])):
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
# Import numpy, Pandas, Regex & Visualisation libraries
import numpy as np
import pandas as pd
import re
from matplotlib import pyplot as plt
tweets = pd.read_csv('/content/train_E6oV3lV.csv')
def clean_corpus(text):
corpus = []
for i in range(len(text)):
tweet = re.sub(r"^https://t.co/[a-zA-Z0-9]*\s"," ", str(text[i]))
tweet = re.sub(r"\s+https://t.co/[a-zA-Z0-9]*\s"," ", tweet)
tweet = re.sub(r"\s+https://t.co/[a-zA-Z0-9]*$"," ", tweet)
tweet = tweet.lower()
tweet = re.sub(r"can't","can not", tweet)
tweet = re.sub(r"hv","have", tweet)
tweet = re.sub(r"ur","your", tweet)
X = tweets.iloc[:, 2].values
y = tweets.iloc[:,1].values
#check how many individual words present in the corpus
word_dict = {}
for doc in corpus:
words = nltk.word_tokenize(doc)
for word in words:
if word not in word_dict:
word_dict[word] = 1
else:
word_dict[word] += 1
#finding the average words present per comment
print(corpus[0])
print(corpus_tokens[0:2])
num_of_words_in_doc =[]
for doc in corpus_tokens:
num_of_words_in_doc.append(len(doc))
print("Average number of words: ", np.average(num_of_words_in_doc))
# Creating Validation Set
X_train,X_test,y_train,y_test = train_test_split(corpus_pad,y,test_size=0.2,random_state=101)
X_train.shape, X_test.shape
# Building & Compiling the model
vocab_size = len(tokenizer.word_index) + 1
max_length = 25
model = keras.Sequential()
model.add(keras.layers.Embedding(input_dim=vocab_size,output_dim=50,input_length=max_length))
model.add(keras.layers.LSTM(units=50,dropout=0.2,recurrent_dropout=0.2))
model.add(keras.layers.Dense(units=1, activation='sigmoid'))
# compile the model