Skip to content

Instantly share code, notes, and snippets.

@swati210994
swati210994 / preprocess_packages.py
Last active October 24, 2020 14:01
Preprocessing packages
import spacy
import nltk
import re
from nltk.corpus import stopwords
import unicodedata
import string
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@swati210994
swati210994 / stopwords_removal.ipynb
Created September 14, 2020 16:47
Removing stopwords
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
string_inp='Hey, @all do youuuuu want to learn Natural Language Processinggg 100% ??'
stopwords_list=stopwords.words('english')
string_no_stopwords = [word for word in string_inp.split() if (word not in stopwords_list) and len(word) > 2]
string_no_stopwords=" ".join(string_no_stopwords)
print('String with stopwords: {} \nString without stopwords and short words: {}'.format(string_inp,string_no_stopwords))
string_no_punct=re.sub(r'[^\w\s]','',string_inp)
print('String input:\n{}\n\nString input with no punctuations:\n{}'.format(string_inp,string_no_punct))
string_no_punct_string=''.join(p for p in string_inp if p not in string.punctuation)
print('String input:\n{}\n\nString input with no punctuations using string:\n{}'.format(string_inp,string_no_punct_string))
string_no_num=re.sub(r'[0-9]','',string_inp)