rohit-gupta · October 17, 2017 09:12
diff --git a/demo.py b/demo.py
 from stopwords import remove_stopwords

 dummy_string = ("""Mr. and Mrs. Dursley, of number four, Privet Drive, were """
                """proud to say that they were perfectly normal, thank you """
                """very much. They were the last people you'd expect to be """
                """involved in anything strange or mysterious, because they """
                """just didn't hold with such nonsense.""")

 # Simple Word tokenizer
 dummy_list = dummy_string.replace(",", " ").replace(".", " ").split(" ")

 # Remove Stop Words
 non_stop_words = remove_stopwords(dummy_list)

 # Display texts
 print "Original:", dummy_string
 print "Non-Stopwords:", non_stop_words
diff --git a/stopwords.py b/stopwords.py
 stop_words = ['all', 'over', 'with', 'had', 'to', 'has', 'do', 'very', 'not',
              'this', 'some', 'are', 'out', 'for', 'be', 'we', 'by', 'on',
              'about', 'of', 'or', 'your', 'from', 'there', 'been', 'was',
              'that', 'but', 'he', 'me', 'will', 'my', 'and', 'is', 'it', 'an',
              'as', 'at', 'have', 'in', 'no', 'which', 'you', 'a', 'i', 'the',
              '', 'just', 'being', 'both', 'through', 'during', 'its',
              'before', ' ', 'how', 'should', 'only', 'under', 'ours', 'them',
              'his', 'get', 'stop', 'they', 'yourselves', 'now', 'him', 'nor',
              'did', 'she', 'each', 'further', 'where', 'few', 'because',
              'doing', 'theirs', 'up', 'our', 'ourselves', 'what', 'below',
              'does', 'above', 'between', 't', 'after', 'here', 'hers', 'her',
              'against', 's', 'own', 'into', 'yourself', 'down', 'would', '&',
              'their', 'too', 'then', 'themselves', 'until', 'more', 'himself',
              'bus', 'don', 'herself', 'than', 'those', 'myself', 'these', 'whom',
              'while', 'can', 'were', 'give', 'am', 'itself', 'any', 'if',
              'again', 'when', 'same', 'also', 'other', 'take', 'may', 'who',
              'most', 'such', 'why', 'off', 'having', 'so', 'yours', 'once']


 def remove_stopwords(raw_words_list):
    no_stopwords_list = [
        word for word in raw_words_list if word.lower() not in stop_words]
    return no_stopwords_list


 if __name__ == '__main__':
    dummy_string = ("""Mr. and Mrs. Dursley, of number four, Privet Drive, """
    	"""were proud to say that they were perfectly normal, thank you """
    	"""very much. They were the last people you'd expect to be """
    	"""involved in anything strange or mysterious, because they """
    	"""just didn't hold with such nonsense.""")
    # Simple Word tokenizer
    dummy_list = dummy_string.replace(","," ").replace("."," ").split(" ")
    # Remove Stop Words
    non_stop_words = remove_stopwords(dummy_list)
    # Display texts
    print "Original text:", dummy_string
    print "Words which are not stop words:", non_stop_words
	from stopwords import remove_stopwords

	dummy_string = ("""Mr. and Mrs. Dursley, of number four, Privet Drive, were """
	"""proud to say that they were perfectly normal, thank you """
	"""very much. They were the last people you'd expect to be """
	"""involved in anything strange or mysterious, because they """
	"""just didn't hold with such nonsense.""")

	# Simple Word tokenizer
	dummy_list = dummy_string.replace(",", " ").replace(".", " ").split(" ")

	# Remove Stop Words
	non_stop_words = remove_stopwords(dummy_list)

	# Display texts
	print "Original:", dummy_string
	print "Non-Stopwords:", non_stop_words
	stop_words = ['all', 'over', 'with', 'had', 'to', 'has', 'do', 'very', 'not',
	'this', 'some', 'are', 'out', 'for', 'be', 'we', 'by', 'on',
	'about', 'of', 'or', 'your', 'from', 'there', 'been', 'was',
	'that', 'but', 'he', 'me', 'will', 'my', 'and', 'is', 'it', 'an',
	'as', 'at', 'have', 'in', 'no', 'which', 'you', 'a', 'i', 'the',
	'', 'just', 'being', 'both', 'through', 'during', 'its',
	'before', ' ', 'how', 'should', 'only', 'under', 'ours', 'them',
	'his', 'get', 'stop', 'they', 'yourselves', 'now', 'him', 'nor',
	'did', 'she', 'each', 'further', 'where', 'few', 'because',
	'doing', 'theirs', 'up', 'our', 'ourselves', 'what', 'below',
	'does', 'above', 'between', 't', 'after', 'here', 'hers', 'her',
	'against', 's', 'own', 'into', 'yourself', 'down', 'would', '&',
	'their', 'too', 'then', 'themselves', 'until', 'more', 'himself',
	'bus', 'don', 'herself', 'than', 'those', 'myself', 'these', 'whom',
	'while', 'can', 'were', 'give', 'am', 'itself', 'any', 'if',
	'again', 'when', 'same', 'also', 'other', 'take', 'may', 'who',
	'most', 'such', 'why', 'off', 'having', 'so', 'yours', 'once']


	def remove_stopwords(raw_words_list):
	no_stopwords_list = [
	word for word in raw_words_list if word.lower() not in stop_words]
	return no_stopwords_list


	if __name__ == '__main__':
	dummy_string = ("""Mr. and Mrs. Dursley, of number four, Privet Drive, """
	"""were proud to say that they were perfectly normal, thank you """
	"""very much. They were the last people you'd expect to be """
	"""involved in anything strange or mysterious, because they """
	"""just didn't hold with such nonsense.""")
	# Simple Word tokenizer
	dummy_list = dummy_string.replace(","," ").replace("."," ").split(" ")
	# Remove Stop Words
	non_stop_words = remove_stopwords(dummy_list)
	# Display texts
	print "Original text:", dummy_string
	print "Words which are not stop words:", non_stop_words