Ankit Choudhary frenzy2106

Head of Competitions at Analytics Vidhya (https://datahack.analyticsvidhya.com/contest/all/)

frenzy2106 / model_training.py

Created March 17, 2020 11:05

	model.compile(optimizer='adam',
	loss='categorical_crossentropy',
	metrics=['accuracy'])

	model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

frenzy2106 / make_predictions.py

Created March 17, 2020 11:13

	# unzip the test file to read images
	!unzip /content/drive/My\ Drive/test_ScVgIM0.zip

	# Read test file names
	test = pd.read_csv('test.csv')
	test_copy = test.copy()

	# Read test images and preprocess
	test_image = []
	for i in tqdm(range(test.shape[0])):

frenzy2106 / import_libraries_nlp.py

Created March 18, 2020 08:42

	# TensorFlow and tf.keras
	import tensorflow as tf
	from tensorflow import keras

	# Import numpy, Pandas, Regex & Visualisation libraries
	import numpy as np
	import pandas as pd
	import re
	from matplotlib import pyplot as plt

frenzy2106 / read_file.py

Created March 18, 2020 09:17

tweets = pd.read_csv('/content/train_E6oV3lV.csv')

frenzy2106 / text_cleaning_preprocessing.py

Last active March 18, 2020 09:22

	def clean_corpus(text):
	corpus = []
	for i in range(len(text)):
	tweet = re.sub(r"^https://t.co/[a-zA-Z0-9]*\s"," ", str(text[i]))
	tweet = re.sub(r"\s+https://t.co/[a-zA-Z0-9]*\s"," ", tweet)
	tweet = re.sub(r"\s+https://t.co/[a-zA-Z0-9]*$"," ", tweet)
	tweet = tweet.lower()
	tweet = re.sub(r"can't","can not", tweet)
	tweet = re.sub(r"hv","have", tweet)
	tweet = re.sub(r"ur","your", tweet)

frenzy2106 / separate_label_Input.py

Created March 18, 2020 09:20

	X = tweets.iloc[:, 2].values
	y = tweets.iloc[:,1].values

frenzy2106 / tokenization.py

Created March 18, 2020 09:24

	#check how many individual words present in the corpus
	word_dict = {}
	for doc in corpus:
	words = nltk.word_tokenize(doc)
	for word in words:
	if word not in word_dict:
	word_dict[word] = 1
	else:
	word_dict[word] += 1

frenzy2106 / padding_text.py

Created March 18, 2020 09:28

	#finding the average words present per comment
	print(corpus[0])
	print(corpus_tokens[0:2])

	num_of_words_in_doc =[]
	for doc in corpus_tokens:
	num_of_words_in_doc.append(len(doc))
	print("Average number of words: ", np.average(num_of_words_in_doc))

frenzy2106 / validation_set.py

Created March 18, 2020 09:30

	# Creating Validation Set
	X_train,X_test,y_train,y_test = train_test_split(corpus_pad,y,test_size=0.2,random_state=101)

	X_train.shape, X_test.shape

frenzy2106 / build_compile_model.py

Created March 18, 2020 09:31

	# Building & Compiling the model

	vocab_size = len(tokenizer.word_index) + 1
	max_length = 25
	model = keras.Sequential()
	model.add(keras.layers.Embedding(input_dim=vocab_size,output_dim=50,input_length=max_length))
	model.add(keras.layers.LSTM(units=50,dropout=0.2,recurrent_dropout=0.2))
	model.add(keras.layers.Dense(units=1, activation='sigmoid'))

	# compile the model