Ravish Chawla ravishchawla

I am a Graduate Student in Machine Learning from Georgia Institute of Technology, studying Data Analytics, Visualization, and Engineering.

ravishchawla / medium_lstm_test_9.py

Last active June 27, 2018 21:38

	# Obtain predictions
	predictions = model.predict_classes(X_test)

	# Convert Y_test to the same format as predictions
	actuals = [0 if y[0] == 1 else 1 for y in Y_test];

	# Use SkLearn's Metrics module
	accuracy_score(predictions, actuals)

ravishchawla / medium_lstm_glove_8.py

Last active June 27, 2018 19:58

	embeddings_index = dict();
	with open('data/glove.6B.100d.txt') as f:
	for line in f:
	values = line.split();
	word = values[0];
	coefs = np.asarray(values[1:], dtype='float32');
	embeddings_index[word] = coefs;

	vocab_size = len(sequence_dict);
	embeddings_matrix = np.zeros((vocab_size, 100));

ravishchawla / model_lstm_model_7.py

Created June 27, 2018 19:36

	model = Sequential();
	model.add(Embedding(len(word_dict), max_cap, input_length=max_cap));
	model.add(LSTM(60, return_sequences=True, recurrent_dropout=0.5));
	model.add(Dropout(0.5))
	model.add(LSTM(60, recurrent_dropout=0.5));
	model.add(Dense(60, activation='relu'));
	model.add(Dense(2, activation='softmax'));
	print(model.summary());

	optimizer = Adam(lr=0.01, decay=0.001);

ravishchawla / medium_lstm_model_5.py

Last active June 27, 2018 19:17

	model = Sequential();
	model.add(Embedding(len(word_dict), max_cap, input_length=max_cap));
	model.add(LSTM(100, return_sequences=True));
	model.add(LSTM(100));
	model.add(Dense(100, activation='relu'));
	model.add(Dense(2, activation='softmax'));
	print(model.summary());

	optimizer = Adam(lr=0.001, decay=0.0001);
	model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

ravishchawla / medium_lstm_pad_4.py

Created June 27, 2018 19:03

	# Truncate and Pad reviews at a Maximum cap of 60 words.
	max_cap = 60;
	X = pad_sequences(reviews_encoded, maxlen=max_cap, truncating='post')

	# Obtain a One-hot Y array for each review label.
	Y = np.array([[0,1] if '0' in label else [1,0] for label in labels])

	# Get a randomized sequence of positions to shuffle reviews
	np.random.seed(1024);
	random_posits = np.arange(len(X))

ravishchawla / medium_lstm_encode_3.py

Last active June 27, 2018 18:59

	# Use a Keras Tokenizer and fit on the sentences

	tokenizer = Tokenizer();
	tokenizer.fit_on_texts(sentences);
	text_sequences = np.array(tokenizer.texts_to_sequences(sentences));
	sequence_dict = tokenizer.word_index;
	word_dict = dict((num, val) for (val, num) in sequence_dict.items());

	# We get a map of encoding-to-word in sequence_dict

ravishchawla / medium_lstm_clean_2.py

Created June 27, 2018 18:42

	'''
	Clean each document by removing unnecesary characters and splitting by space.
	'''
	def clean_document(doco):
	punctuation = string.punctuation + '\n\n';
	punc_replace = ''.join([' ' for s in punctuation]);
	doco_clean = doco.replace('-', ' ');
	doco_alphas = re.sub(r'\W +', '', doco_clean)
	trans_table = str.maketrans(punctuation, punc_replace);
	doco_clean = ' '.join([word.translate(trans_table) for word in doco_alphas.split(' ')]);

ravishchawla / medium_lstm_load_1.py

Last active June 27, 2018 18:33

	'''
	Read reviews from a JSON-formatted file into an array.
	'''
	lines = [];
	num_pos = 0; num_neg = 0; num_total = 75000;

	with open('data/review.json', 'r') as f:
	for line in f:
	if (len(lines) >= (num_total * 2)):
	break;

ravishchawla / prbot.py

Last active November 15, 2017 20:11

prbot

	import time;
	import datetime;
	import requests;
	import json;
	import os;
	import os.path;
	import time;

	'''
	Bot to post Pull Request changes to a Slack channel.

ravishchawla / word_to_vec.py

Created September 12, 2017 16:07

	# coding: utf-8

	# # Training a Word2Vec Model on the Reddit Comments Dataset
	#
	# ### Ravish Chawla

	# In[276]:

	get_ipython().magic('matplotlib inline')