Created
January 5, 2017 07:52
-
-
Save Getmrahul/4c79e0137f950ea24b427a6271dc5f2a to your computer and use it in GitHub Desktop.
Basic Text Classification with Scikit-learn [Question/Answer Classifier]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ! usr/bin/python | |
import pandas as pd | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.externals import joblib | |
from sklearn.pipeline import Pipeline | |
from sklearn.feature_extraction.text import TfidfTransformer | |
from sklearn.feature_extraction.text import CountVectorizer | |
text_clf = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('clf', MultinomialNB()),]) | |
df = pd.read_csv('example.csv', header=None, sep=',', names=['tweets', 'class']) | |
X = df['tweets'] | |
y = df['class'] | |
text_clf.fit(X,y) | |
joblib.dump(text_clf, 'qa_modal.pkl') | |
clf = joblib.load('qa_modal.pkl') | |
docs_new = ['who are you', 'I love drinks', 'Do you know french'] | |
predicted = clf.predict(docs_new) | |
print predicted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ! usr/bin/python | |
from sklearn.externals import joblib | |
clf = joblib.load('qa_modal.pkl') | |
docs_new = ['who are you', 'Do you know me', 'Do you know french'] | |
predicted = clf.predict(docs_new) | |
print predicted |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
can you please provide example.csv.