nicksyna01 · November 18, 2020 05:16 · nicksyna01 · Jul 22, 2019
diff --git a/text_classification.py b/text_classification.py
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, confusion_matrix
 from sklearn.metrics import classification_report
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.pipeline import Pipeline
 from sklearn.feature_extraction.text import TfidfTransformer
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

 #Are the tags which we are using currently for tagging the sentences
 my_tags = ['Social','Economic','Political','Health','Environmental','Ministry','Aim/Objective/Goal','Rural Development','Scheme','Proposed','Brief','Organization','Remedy','Defence','Foreign Relations','Science & Technology','Location','Facts','Space Science','Cultural','Future','International']

 #reading the file which has our data
 df = pd.read_csv("out1.csv")

 #Using only the data for which we have the tags available and splitting the data into training and testing sets
 X = df['Text'][0:65]
 y = df['Class'][0:65]
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 42)


 #Making a pipeline where we are first creating the vectors then transforming those and finally applying our classifier
 nb = Pipeline([('vect', CountVectorizer()),
               ('tfidf', TfidfTransformer()),
               ('clf', MultinomialNB()),
              ])
 nb.fit(X_train, y_train)
 y_pred = nb.predict(X_test)

 #print('accuracy %s' % accuracy_score(y_pred, y_test))
 #print(classification_report(y_test, y_pred,target_names=my_tags))


 print(X_test,y_pred,y_test)
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score, confusion_matrix
	from sklearn.metrics import classification_report
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.pipeline import Pipeline
	from sklearn.feature_extraction.text import TfidfTransformer
	from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

	#Are the tags which we are using currently for tagging the sentences
	my_tags = ['Social','Economic','Political','Health','Environmental','Ministry','Aim/Objective/Goal','Rural Development','Scheme','Proposed','Brief','Organization','Remedy','Defence','Foreign Relations','Science & Technology','Location','Facts','Space Science','Cultural','Future','International']

	#reading the file which has our data
	df = pd.read_csv("out1.csv")

	#Using only the data for which we have the tags available and splitting the data into training and testing sets
	X = df['Text'][0:65]
	y = df['Class'][0:65]
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 42)


	#Making a pipeline where we are first creating the vectors then transforming those and finally applying our classifier
	nb = Pipeline([('vect', CountVectorizer()),
	('tfidf', TfidfTransformer()),
	('clf', MultinomialNB()),
	])
	nb.fit(X_train, y_train)
	y_pred = nb.predict(X_test)

	#print('accuracy %s' % accuracy_score(y_pred, y_test))
	#print(classification_report(y_test, y_pred,target_names=my_tags))


	print(X_test,y_pred,y_test)