dayyass · September 29, 2021 12:25
diff --git a/tfidf_token2idf.py b/tfidf_token2idf.py
 from sklearn.feature_extraction.text import TfidfVectorizer

 # data
 corpus = [
    'This is the first document.',
    'This document is the second document.',
    'And this is the third one.',
    'Is this the first document?',
 ]

 # fit
 tfidf_vectorizer = TfidfVectorizer()
 tfidf_vectorizer.fit(corpus)

 # token2idf
 token2idf = {token: tfidf_vectorizer.idf_[idx] for token, idx in tfidf_vectorizer.vocabulary_.items()}

 # sorted token2idf
 sorted_token2idf = sorted(
    token2idf.items(),
    key=lambda x: x[1],
 )
	from sklearn.feature_extraction.text import TfidfVectorizer

	# data
	corpus = [
	'This is the first document.',
	'This document is the second document.',
	'And this is the third one.',
	'Is this the first document?',
	]

	# fit
	tfidf_vectorizer = TfidfVectorizer()
	tfidf_vectorizer.fit(corpus)

	# token2idf
	token2idf = {token: tfidf_vectorizer.idf_[idx] for token, idx in tfidf_vectorizer.vocabulary_.items()}

	# sorted token2idf
	sorted_token2idf = sorted(
	token2idf.items(),
	key=lambda x: x[1],
	)