GeorgeSeif · December 28, 2019 16:48
diff --git a/scikit_learn_1.py b/scikit_learn_1.py
 import pandas as pd
 from sklearn.feature_extraction.text import TfidfVectorizer

 def get_tf_idf(vectorizer):
  feature_names = vectorizer.get_feature_names()
  dense_vec = vectors.todense()
  dense_list = dense_vec.tolist()
  tfidf_data = pd.DataFrame(dense_list, columns=feature_names)
  return tfidf_data
  

 vectorizer = TfidfVectorizer()

 doc_1 = "TF-IDF uses statistics to measure how important a word is to " \
        "a particular document"
 doc_2 = "The TF-IDF is perfectly balanced, considering both local and global " \
        "levels of statistics for the target word."
 doc_3 = "Words that occur more frequently in a document are weighted higher, " \
        "but only if they're more rare within the whole document."
 documents_list = [doc_1, doc_2, doc_3]

 vectors = vectorizer.fit_transform(documents_list)

 tfidf_data = get_tf_idf(vectorizer)

 print(tfidf_data)
 # Prints the TF-IDF data for all words across all documents
	import pandas as pd
	from sklearn.feature_extraction.text import TfidfVectorizer

	def get_tf_idf(vectorizer):
	feature_names = vectorizer.get_feature_names()
	dense_vec = vectors.todense()
	dense_list = dense_vec.tolist()
	tfidf_data = pd.DataFrame(dense_list, columns=feature_names)
	return tfidf_data


	vectorizer = TfidfVectorizer()

	doc_1 = "TF-IDF uses statistics to measure how important a word is to " \
	"a particular document"
	doc_2 = "The TF-IDF is perfectly balanced, considering both local and global " \
	"levels of statistics for the target word."
	doc_3 = "Words that occur more frequently in a document are weighted higher, " \
	"but only if they're more rare within the whole document."
	documents_list = [doc_1, doc_2, doc_3]

	vectors = vectorizer.fit_transform(documents_list)

	tfidf_data = get_tf_idf(vectorizer)

	print(tfidf_data)
	# Prints the TF-IDF data for all words across all documents