Skip to content

Instantly share code, notes, and snippets.

View sai-teja-ponugoti's full-sized avatar
🎯
Focusing

stponugoti sai-teja-ponugoti

🎯
Focusing
View GitHub Profile
@sai-teja-ponugoti
sai-teja-ponugoti / beautiful_soup_example.ipynb
Created July 3, 2020 19:18
Beautiful_Soup_example.ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@sai-teja-ponugoti
sai-teja-ponugoti / data_augmentation_using_nlpaug.ipynb
Last active July 3, 2020 14:25
Data_Augmentation_Using_NLPaug.ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@sai-teja-ponugoti
sai-teja-ponugoti / extracting_text_from_images_tesseract.ipynb
Last active July 3, 2020 19:25
Extracting_text_from_images_tesseract.ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# importing NLTK libarary stopwords
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
stopwords_default = stopwords.words('english')
print(len(stopwords_defaut))
stopwords_default.append('like')
, 'marvel', 'ghost'])
from gensim.parsing.preprocessing import remove_stopwords
sample_text = "Oh man, this is pretty cool. We will do more such things."
sample_text_NSW = remove_stopwords(text)
print(word_tokenize(sample_text))
print(word_tokenize(sample_text_NSW))
import spacy
from nltk.tokenize import word_tokenize
# loading english language model of spaCy
en_model = spacy.load('en_core_web_sm')
# gettign the list of default stop words in spaCy english model
stopwords = en_model.Defaults.stop_words
sample_text = "Oh man, this is pretty cool. We will do more such things."
text_tokens = word_tokenize(sample_text)
tokens_without_sw= [word for word in text_tokens if not word in stopwords]
# importing NLTK libarary stopwords
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')
from nltk.tokenize import word_tokenize
print(stopwords.words('english'))
# random sentecnce with lot of stop words
# forming new sentences for testing, feel free to experiment
# sentence 1 is bit sarcastic, whereas sentence two is a general statment.
new_sentence = [
"granny starting to fear spider in the garden might be real",
"game of thrones season finale showing this sunday night"]
# Converting the sentences to sequences using tokenizer
new_sequences = tokenizer.texts_to_sequences(new_sentence)
# padding the new sequences to make them have same dimensions
new_padded = pad_sequences(new_sequences, maxlen = max_length,
# forming new sentences for testing, feel free to experiment
# sentence 1 is bit sarcastic, whereas sentence two is a general statment.
new_sentence = [
"granny starting to fear spider in the garden might be real",
"game of thrones season finale showing this sunday night"]
# Converting the sentences to sequences using tokenizer
new_sequences = tokenizer.texts_to_sequences(new_sentence)
# padding the new sequences to make them have same dimensions
new_padded = pad_sequences(new_sequences, maxlen = max_length,
embedding_dim = 16
# creating a model for sentiment analysis
model = tf.keras.Sequential([
# addinging an Embedding layer for Neural Network to learn the vectors
tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length = max_length),
# Global Average pooling is similar to adding up vectors in this case
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(24, activation = 'relu'),
tf.keras.layers.Dense(1, activation = 'sigmoid')