Keiku · April 11, 2017 07:40
diff --git a/extract_tfidf_vector.py b/extract_tfidf_vector.py
 text = ['This is a string', 'This is another string', 'TFIDF computation calculation', 'TfIDF is the product of TF and IDF']

 from sklearn.feature_extraction.text import TfidfVectorizer
 vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english', norm = None)

 X = vectorizer.fit_transform(text)
 X_vovab = vectorizer.get_feature_names()
 # Out[1]: ['calculation', 'computation', 'idf', 'product', 'string', 'tf', 'tfidf']
 X_mat = X.todense()
 # Out[2]:
 # matrix([[ 0.        ,  0.        ,  0.        ,  0.        ,  1.51082562,
 #           0.        ,  0.        ],
 #         [ 0.        ,  0.        ,  0.        ,  0.        ,  1.51082562,
 #           0.        ,  0.        ],
 #         [ 1.91629073,  1.91629073,  0.        ,  0.        ,  0.        ,
 #           0.        ,  1.51082562],
 #         [ 0.        ,  0.        ,  1.91629073,  1.91629073,  0.        ,
 #           1.91629073,  1.51082562]])
 X_idf = vectorizer.idf_
 # Out[3]:
 # array([ 1.91629073,  1.91629073,  1.91629073,  1.91629073,  1.51082562,
 #         1.91629073,  1.51082562])
	text = ['This is a string', 'This is another string', 'TFIDF computation calculation', 'TfIDF is the product of TF and IDF']

	from sklearn.feature_extraction.text import TfidfVectorizer
	vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english', norm = None)

	X = vectorizer.fit_transform(text)
	X_vovab = vectorizer.get_feature_names()
	# Out[1]: ['calculation', 'computation', 'idf', 'product', 'string', 'tf', 'tfidf']
	X_mat = X.todense()
	# Out[2]:
	# matrix([[ 0. , 0. , 0. , 0. , 1.51082562,
	# 0. , 0. ],
	# [ 0. , 0. , 0. , 0. , 1.51082562,
	# 0. , 0. ],
	# [ 1.91629073, 1.91629073, 0. , 0. , 0. ,
	# 0. , 1.51082562],
	# [ 0. , 0. , 1.91629073, 1.91629073, 0. ,
	# 1.91629073, 1.51082562]])
	X_idf = vectorizer.idf_
	# Out[3]:
	# array([ 1.91629073, 1.91629073, 1.91629073, 1.91629073, 1.51082562,
	# 1.91629073, 1.51082562])