This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
nltk.download('machado') | |
from nltk.probability import FreqDist | |
from nltk.tokenize import word_tokenize | |
nltk.download('punkt') | |
# corpus dom casmurro | |
corpus_dom_casmurro = nltk.corpus.machado.raw('romance/marm08.txt') | |
# pre processamento |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
nltk.download('stopwords') | |
# retorna lista de stopwords em portugues | |
stopwords = nltk.corpus.stopwords.words('portuguese') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
# id do corpus | |
# no nosso caso estamos usando id machado | |
nltk_id = 'machado' | |
# eh necessario baixar o corpus | |
nltk.download(nltk_id) | |
# agora o corpus esta acessivel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import confusion_matrix | |
tn, fp, fn, tp = confusion_matrix(y_test, y_predito).ravel() | |
precision = tp/(tp + fp) | |
recall = tp/(tp + fn) | |
fpr = fp/(fp + tn) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import roc_auc_score | |
""" | |
Função que calcula a área sob a curva ROC. | |
Recebe os valores preditos e os valores | |
do teste para calcular a area. Seu retorno | |
é um valor no intervalo [0, 1]. | |
""" | |
auc_score = roc_auc_score(y_true, y_scores) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import metrics | |
""" | |
a função do sklearn retorna uma tupla | |
contendo numpy arrays com true positive rate (TPR) | |
false positive rate (FPR) e threshold | |
""" | |
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_predito) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def nnTrain(epsilon, alpha, max_iter): | |
input_layer_size = x_train.shape[1] | |
hidden_layer_size = 800 | |
num_labels = 10 | |
theta_1, theta_2 = randomInit(input_layer_size, hidden_layer_size, num_labels) | |
for i in range(max_iter): | |
J_theta, Theta1_grad, Theta2_grad = nnRegCostFunction( | |
theta_1, theta_2, x_train, y_train, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def classifications(theta_1, theta_2, X,): | |
a1 = np.append(np.ones(shape=(X.shape[0], 1)), X, axis=1) | |
z2 = a1 @ theta_1.transpose() | |
a2 = sigmoid(z2) | |
a2 = np.append(np.ones(shape=(a2.shape[0], 1)), a2, axis=1) | |
z3 = a2 @ theta_2.transpose() | |
a3 = sigmoid(z3) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def nnRegCostFunction(theta_1, theta_2, X, y, input_layer_size, hidden_layer_size, num_labels): | |
# Variáveis úteis (m=número de imagens, n=número de pixels por imagem) | |
[m, n] = X.shape | |
# Algoritmo Feedforward | |
# Adiciona a bias unit (uma coluna de 1) ao dataset | |
a1 = np.append(np.ones(shape=(X.shape[0], 1)), X, axis=1) | |
z2 = a1 @ theta_1.transpose() | |
a2 = sigmoid(z2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def randomInit(input_layer_size, hidden_layer_size, num_labels): | |
# Parâmetros da primeira camada (incluindo bias) | |
# As matrizes têm formato (num_entradas, num_saidas) | |
size1 = (hidden_layer_size, input_layer_size+1) | |
theta1_ini = np.random.normal(0, .1, size=size1) | |
# Parâmetros da primeira camada (incluindo bias) | |
size2 = (num_labels, hidden_layer_size+1) | |
theta2_ini = np.random.normal(0, .1, size=size2) |
NewerOlder