Skip to content

Instantly share code, notes, and snippets.

@ssisaias
Created July 2, 2018 02:34
Show Gist options
  • Save ssisaias/b9521c910d66c440c9e90d21f5360536 to your computer and use it in GitHub Desktop.
Save ssisaias/b9521c910d66c440c9e90d21f5360536 to your computer and use it in GitHub Desktop.
import nltk
import csv
import pickle
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.naive_bayes import MultinomialNB
from myclassify import Analise
from pandas import DataFrame
import numpy
from sklearn.feature_extraction.text import CountVectorizer
#Here we have the validation steps and a way to load the classifier.
#Load classifier and vectorizer
with open('classifier.pickle', 'rb') as handle:
classifier = pickle.load(handle)
with open('count_vectorizer.pickle', 'rb') as handle:
count_vectorizer = pickle.load(handle)
dados = []
x = []
y = []
with open('output_results_processed.txt','r',encoding='utf8') as file:
reader = csv.reader(file)
for row in reader:
dados.append((row[0],row[1]))
preprocessor = Analise()
preprocessor.stop_words = set(stopwords.words("portuguese"))
preprocessor.stopwordsnltk = nltk.corpus.stopwords.words('portuguese')
classification_data = preprocessor.removerStopWords(dados)
classification_data = preprocessor.aplicastemmer(classification_data)
x = []
for item in classification_data:
frase = ""
for witem in item[0]:
frase += witem + " "
x.append(frase)
y = []
for item in classification_data:
y.append(item[1])
## Classification
teste = x
teste_counts = count_vectorizer.transform(teste)
predictions = classifier.predict(teste_counts)
predictions
markups = y
diferencas = predictions == markups
acertos = [a for a in diferencas if a]
rate = 100.0 * len(acertos)/len(predictions)
rate
#scikit-metrics (Use this one)
from sklearn.metrics import classification_report
target_names = ['Class 0 (neutro)','Class 1 (neg)']
print(classification_report(markups,predictions,target_names=target_names))
#Metodo antigo
#from sklearn.metrics import recall_score
#from sklearn.metrics import precision_score
## Precision, recall
# recall
#round(100*recall_score(markups, predictions,average='macro'),3)
#round(100*recall_score(markups, predictions,average='micro'),3)
#round(100*recall_score(markups, predictions,average='weighted'),3)
#recall_score(markups, predictions,average=None)
# precision
#round(100*precision_score(markups, predictions, average='macro'),3)
#round(100*precision_score(markups, predictions,average='micro'),3)
#round(100*precision_score(markups, predictions,average='weighted'),3)
#precision_score(markups, predictions,average=None)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment