NaelsonDouglas · February 4, 2019 03:36
diff --git a/tf-idf.py b/tf-idf.py
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer


 data = open('./texto_aleatorio.txt')
 custom_stop_words = frozenset(["palavra1", "palavra2","palavra3"])
 vectorizer = CountVectorizer(stop_words=custom_stop_words)
 vectorizer.fit(data)
 print(vectorizer.vocabulary_)



 data = open('./texto_aleatorio.txt')
 custom_stop_words = frozenset(["palavra1", "palavra2","palavra3"])

 vectorizer = TfidfVectorizer(stop_words=custom_stop_words)
 vectorizer.fit(data)
 print(vectorizer.vocabulary_)
 print(vectorizer.idf_)

 data = open('./texto_aleatorio.txt')
 vector = vectorizer.transform([data.read()])
 print(vector.shape)
 print('\n')
 print(vector.toarray())
	from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer


	data = open('./texto_aleatorio.txt')
	custom_stop_words = frozenset(["palavra1", "palavra2","palavra3"])
	vectorizer = CountVectorizer(stop_words=custom_stop_words)
	vectorizer.fit(data)
	print(vectorizer.vocabulary_)



	data = open('./texto_aleatorio.txt')
	custom_stop_words = frozenset(["palavra1", "palavra2","palavra3"])

	vectorizer = TfidfVectorizer(stop_words=custom_stop_words)
	vectorizer.fit(data)
	print(vectorizer.vocabulary_)
	print(vectorizer.idf_)

	data = open('./texto_aleatorio.txt')
	vector = vectorizer.transform([data.read()])
	print(vector.shape)
	print('\n')
	print(vector.toarray())