nithyadurai87 · March 6, 2025 22:07
diff --git a/02_bag_of_words.py b/02_bag_of_words.py
 import nltk
 nltk.download('stopwords')
 nltk.download('punkt')
 nltk.download('punkt_tab')   
 from sklearn.feature_extraction.text import CountVectorizer

 paragraph = "Periyar was a social reformer in Tamil Nadu. He founded the Self-Respect Movement. This movement aimed to promote equality and end caste discrimination. Today, he is celebrated as a key figure in the fight for social justice and equality in Tamil Nadu."
 x = [i for i in paragraph.split('.')]

 tokens = CountVectorizer()
 vectors = tokens.fit_transform(x)

 print(tokens.vocabulary_)
 print(vectors.toarray())

 # tokens = CountVectorizer(stop_words='english')
 # tokens = CountVectorizer(ngram_range = (2, 2), stop_words='english')
	import nltk
	nltk.download('stopwords')
	nltk.download('punkt')
	nltk.download('punkt_tab')
	from sklearn.feature_extraction.text import CountVectorizer

	paragraph = "Periyar was a social reformer in Tamil Nadu. He founded the Self-Respect Movement. This movement aimed to promote equality and end caste discrimination. Today, he is celebrated as a key figure in the fight for social justice and equality in Tamil Nadu."
	x = [i for i in paragraph.split('.')]

	tokens = CountVectorizer()
	vectors = tokens.fit_transform(x)

	print(tokens.vocabulary_)
	print(vectors.toarray())

	# tokens = CountVectorizer(stop_words='english')
	# tokens = CountVectorizer(ngram_range = (2, 2), stop_words='english')