-
-
Save foo0x29a/c16882510afe8b943ca85aa43b068e86 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from itertools import cycle | |
from sklearn import tree | |
from sklearn import svm, datasets | |
from sklearn.metrics import roc_curve, auc | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import label_binarize | |
from sklearn.multiclass import OneVsRestClassifier | |
from scipy import interp | |
import sklearn | |
import numpy | |
import pandas as pd | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.metrics import confusion_matrix | |
def distance(train, test): | |
return numpy.linalg.norm(train-test) | |
def knn(): | |
k = 3 | |
X_train = pd.read_csv("3_train.csv") | |
Y_train = X_train.iloc[:,(-1)].values | |
X_train = X_train.iloc[:,0:-1].values | |
X_test = pd.read_csv("3_test.csv") | |
Y_test = X_test.iloc[:,(-1)].values | |
X_test = X_test.iloc[:,0:-1].values | |
res = [] | |
print "X treino:", X_train | |
print "Y treino:", Y_train | |
for test in X_test: | |
print "instancia:", test | |
distances = [] | |
for train in X_train: | |
dist = distance(train,test) | |
distances.append(dist) | |
d, y = zip(*sorted(zip(distances, Y_train))) | |
print "distancias ordenadas:", d | |
if(y[0:k].count(0) > y[0:k].count(1)): | |
res.append(0) | |
else: | |
res.append(1) | |
print res | |
def knn_(): | |
# usando library | |
print "k = 3" | |
neigh = KNeighborsClassifier(n_neighbors=3, metric="euclidean") | |
X_train = pd.read_csv("3_train.csv") | |
Y_train = X_train.iloc[:,(-1)] | |
X_train = X_train.iloc[:,0:-1] | |
neigh.fit(X_train, Y_train) | |
X_test = pd.read_csv("3_test.csv") | |
Y_test = X_test.iloc[:,(-1)].values | |
X_test = X_test.iloc[:,0:-1] | |
prediction = neigh.predict(X_test) | |
print "predicted:", prediction | |
print "true:", Y_test | |
print confusion_matrix(Y_test, prediction) | |
def decision_tree(): | |
# using library | |
X_train = pd.read_csv("3_train.csv") | |
Y_train = X_train.iloc[:,(-1)] | |
X_train = X_train.iloc[:,0:-1] | |
X_test = pd.read_csv("3_test.csv") | |
Y_test = X_test.iloc[:,(-1)].values | |
X_test = X_test.iloc[:,0:-1] | |
clf = tree.DecisionTreeClassifier(criterion="entropy") | |
clf = clf.fit(X_train, Y_train) | |
import graphviz | |
dot_data = tree.export_graphviz(clf, out_file="3.out", | |
feature_names=["Idade", "H", "J", "R", "B", "C"], | |
class_names=["0", "1"], | |
filled=True, rounded=True, | |
special_characters=True) | |
graph = graphviz.Source(dot_data) | |
prediction = clf.predict(X_test) | |
print "predicted:", prediction | |
print "true:", Y_test | |
print confusion_matrix(Y_test, prediction) | |
if __name__ == '__main__': | |
#knn() | |
#decision_tree() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment