martinthenext · January 25, 2019 15:42
diff --git a/how-to-make-qa-bot-4.py b/how-to-make-qa-bot-4.py
 from sklearn.feature_extraction.text import TfidfVectorizer

 sentences = [sentence
             for datum in squad['data']
             for paragraph in datum['paragraphs']
             for sentence in paragraph['context'].split('. ')
            ]

 tf_idf_vect = TfidfVectorizer(tokenizer=tokenize_and_stem)
 tf_idf_vect.fit(sentences)
 question_repr = tf_idf_vect.transform([question])

 context_sentences = context.split('. ')
 sentence_repr = tf_idf_vect.transform(context_sentences)
 sentence_repr

 '''<4x76188 sparse matrix of type '<class 'numpy.float64'>'
 	with 71 stored elements in Compressed Sparse Row format>'''
	from sklearn.feature_extraction.text import TfidfVectorizer

	sentences = [sentence
	for datum in squad['data']
	for paragraph in datum['paragraphs']
	for sentence in paragraph['context'].split('. ')
	]

	tf_idf_vect = TfidfVectorizer(tokenizer=tokenize_and_stem)
	tf_idf_vect.fit(sentences)
	question_repr = tf_idf_vect.transform([question])

	context_sentences = context.split('. ')
	sentence_repr = tf_idf_vect.transform(context_sentences)
	sentence_repr

	'''<4x76188 sparse matrix of type '<class 'numpy.float64'>'
	with 71 stored elements in Compressed Sparse Row format>'''