Skip to content

Instantly share code, notes, and snippets.

View Tathagatd96's full-sized avatar

Tathagat Dasgupta Tathagatd96

View GitHub Profile
#tf-idf
tfidf_transformer=TfidfTransformer()
X_train_tfidf=tfidf_transformer.fit_transform(X_train_counts)
print(X_train_tfidf.shape)
#Classifier Training
clf=MultinomialNB().fit(X_train_tfidf,twenty_train.target)
docs_new=['God is love','OpenGL on the GPU is fast']
X_new_counts=count_vect.transform(docs_new)
X_new_tfidf=tfidf_transformer.transform(X_new_counts)
predicted=clf.predict(X_new_tfidf)
#Building a pipeline
text_clf=Pipeline([('vect',CountVectorizer()),('tfidf',TfidfTransformer()),('clf',MultinomialNB())])
text_clf=text_clf.fit(twenty_train.data,twenty_train.target)
#Performance on test set
twenty_test=fetch_20newsgroups(subset='test',categories=categories,shuffle=True,random_state=42)
doc_test=twenty_test.data
predicted=text_clf.predict(doc_test)
print "Classifier Accuracy:"
print(np.mean(predicted==twenty_test.target))
#SVM Implementation
text_clf=Pipeline([('vect',CountVectorizer()),('tfidf',TfidfTransformer()),('clf',SGDClassifier(loss='hinge',alpha=1e-3,n_iter=5,random_state=42))])
text_clf.fit(twenty_train.data,twenty_train.target)
predicted=text_clf.predict(doc_test)
print "SVM Accuracy:"
print(np.mean(predicted==twenty_test.target))
2257
From: [email protected] (Michael Collier)
Subject: Converting images to HP LaserJet III?
Nntp-Posting-Host: hampton
comp.graphics
[1 1 3 3 3 3 3 2 2 2]
(2257, 35788)
4690
(2257, 35788)
'God is love'=>soc.religion.christian
'OpenGL on the GPU is fast'=>comp.graphics
Classifier Accuracy:
0.834886817577