stponugoti sai-teja-ponugoti

🎯

Focusing

Data Scientist || Machine Learning Engineer || University of Waterloo

sai-teja-ponugoti / beautiful_soup_example.ipynb

Created July 3, 2020 19:18

Beautiful_Soup_example.ipynb

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

sai-teja-ponugoti / data_augmentation_using_nlpaug.ipynb

Last active July 3, 2020 14:25

Data_Augmentation_Using_NLPaug.ipynb

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

sai-teja-ponugoti / extracting_text_from_images_tesseract.ipynb

Last active July 3, 2020 19:25

Extracting_text_from_images_tesseract.ipynb

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

sai-teja-ponugoti / custom_stop_wprds.py

Created June 9, 2020 21:50

	# importing NLTK libarary stopwords
	import nltk
	from nltk.corpus import stopwords
	nltk.download('stopwords')

	stopwords_default = stopwords.words('english')
	print(len(stopwords_defaut))

	stopwords_default.append('like')
	, 'marvel', 'ghost'])

sai-teja-ponugoti / gensim.py

Created June 9, 2020 20:37

	from gensim.parsing.preprocessing import remove_stopwords

	sample_text = "Oh man, this is pretty cool. We will do more such things."
	sample_text_NSW = remove_stopwords(text)

	print(word_tokenize(sample_text))
	print(word_tokenize(sample_text_NSW))

sai-teja-ponugoti / spaCy.py

Created June 9, 2020 20:20

	import spacy
	from nltk.tokenize import word_tokenize
	# loading english language model of spaCy
	en_model = spacy.load('en_core_web_sm')
	# gettign the list of default stop words in spaCy english model
	stopwords = en_model.Defaults.stop_words

	sample_text = "Oh man, this is pretty cool. We will do more such things."
	text_tokens = word_tokenize(sample_text)
	tokens_without_sw= [word for word in text_tokens if not word in stopwords]

sai-teja-ponugoti / nltk.py

Created June 9, 2020 19:41

	# importing NLTK libarary stopwords
	import nltk
	from nltk.corpus import stopwords
	nltk.download('stopwords')
	nltk.download('punkt')
	from nltk.tokenize import word_tokenize

	print(stopwords.words('english'))

	# random sentecnce with lot of stop words

sai-teja-ponugoti / testing.py

Created June 2, 2020 01:10

	# forming new sentences for testing, feel free to experiment
	# sentence 1 is bit sarcastic, whereas sentence two is a general statment.
	new_sentence = [
	"granny starting to fear spider in the garden might be real",
	"game of thrones season finale showing this sunday night"]

	# Converting the sentences to sequences using tokenizer
	new_sequences = tokenizer.texts_to_sequences(new_sentence)
	# padding the new sequences to make them have same dimensions
	new_padded = pad_sequences(new_sequences, maxlen = max_length,

sai-teja-ponugoti / new_sentences.py

Last active June 2, 2020 01:00

	# forming new sentences for testing, feel free to experiment
	# sentence 1 is bit sarcastic, whereas sentence two is a general statment.
	new_sentence = [
	"granny starting to fear spider in the garden might be real",
	"game of thrones season finale showing this sunday night"]

	# Converting the sentences to sequences using tokenizer
	new_sequences = tokenizer.texts_to_sequences(new_sentence)
	# padding the new sequences to make them have same dimensions
	new_padded = pad_sequences(new_sequences, maxlen = max_length,

sai-teja-ponugoti / model.py

Last active June 2, 2020 01:07

	embedding_dim = 16

	# creating a model for sentiment analysis
	model = tf.keras.Sequential([
	# addinging an Embedding layer for Neural Network to learn the vectors
	tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length = max_length),
	# Global Average pooling is similar to adding up vectors in this case
	tf.keras.layers.GlobalAveragePooling1D(),
	tf.keras.layers.Dense(24, activation = 'relu'),
	tf.keras.layers.Dense(1, activation = 'sigmoid')