Skip to content

Instantly share code, notes, and snippets.

@ssisaias
Created July 2, 2018 02:49
Show Gist options
  • Save ssisaias/fc49e7983a244b8c29b8f069f263216a to your computer and use it in GitHub Desktop.
Save ssisaias/fc49e7983a244b8c29b8f069f263216a to your computer and use it in GitHub Desktop.
import nltk
import csv
import pickle
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
class Analise (object):
def __init__(self):
self.stop_words = set(stopwords.words("portuguese"))
self.dataPath_treino = 'output.txt'
self.stopwordsnltk = nltk.corpus.stopwords.words('portuguese')
def getBaseTeste(self):
dados = []
with open(self.dataPath_teste, 'r', encoding="utf8") as file:
reader = csv.reader(file)
for row in reader:
dados.append((row[0], row[1]))
return dados
def getBase(self):
dados = []
with open(self.dataPath_treino, 'r', encoding="utf8") as file:
reader = csv.reader(file)
for row in reader:
dados.append((row[0],row[1]))
return dados
def removerStopWords(self,texto):
frases_stop = []
for(palavras,emocao) in texto:
frase = ' '.join([word for word in word_tokenize(palavras) if word not in self.stop_words])
frases_stop.append((frase,emocao))
return frases_stop
def aplicastemmer(self,texto):
stemmer = nltk.stem.RSLPStemmer()
frasesstemming =[]
for (palavras,emocao) in texto:
comstemming = [str(stemmer.stem(p)) for p in palavras.split() if p not in self.stopwordsnltk]
frasesstemming.append((comstemming,emocao))
return frasesstemming
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment