Skip to content

Instantly share code, notes, and snippets.

@foo0x29a
Last active June 16, 2018 21:32
Show Gist options
  • Save foo0x29a/c16882510afe8b943ca85aa43b068e86 to your computer and use it in GitHub Desktop.
Save foo0x29a/c16882510afe8b943ca85aa43b068e86 to your computer and use it in GitHub Desktop.
#! /usr/bin/python
import numpy as np
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn import tree
from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from scipy import interp
import sklearn
import numpy
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
def distance(train, test):
return numpy.linalg.norm(train-test)
def knn():
k = 3
X_train = pd.read_csv("3_train.csv")
Y_train = X_train.iloc[:,(-1)].values
X_train = X_train.iloc[:,0:-1].values
X_test = pd.read_csv("3_test.csv")
Y_test = X_test.iloc[:,(-1)].values
X_test = X_test.iloc[:,0:-1].values
res = []
print "X treino:", X_train
print "Y treino:", Y_train
for test in X_test:
print "instancia:", test
distances = []
for train in X_train:
dist = distance(train,test)
distances.append(dist)
d, y = zip(*sorted(zip(distances, Y_train)))
print "distancias ordenadas:", d
print
if(y[0:k].count(0) > y[0:k].count(1)):
res.append(0)
else:
res.append(1)
print res
def knn_():
# usando library
print "k = 3"
neigh = KNeighborsClassifier(n_neighbors=3, metric="euclidean")
X_train = pd.read_csv("3_train.csv")
Y_train = X_train.iloc[:,(-1)]
X_train = X_train.iloc[:,0:-1]
neigh.fit(X_train, Y_train)
X_test = pd.read_csv("3_test.csv")
Y_test = X_test.iloc[:,(-1)].values
X_test = X_test.iloc[:,0:-1]
prediction = neigh.predict(X_test)
print "predicted:", prediction
print "true:", Y_test
print confusion_matrix(Y_test, prediction)
def decision_tree():
# using library
X_train = pd.read_csv("3_train.csv")
Y_train = X_train.iloc[:,(-1)]
X_train = X_train.iloc[:,0:-1]
X_test = pd.read_csv("3_test.csv")
Y_test = X_test.iloc[:,(-1)].values
X_test = X_test.iloc[:,0:-1]
clf = tree.DecisionTreeClassifier(criterion="entropy")
clf = clf.fit(X_train, Y_train)
import graphviz
dot_data = tree.export_graphviz(clf, out_file="3.out",
feature_names=["Idade", "H", "J", "R", "B", "C"],
class_names=["0", "1"],
filled=True, rounded=True,
special_characters=True)
graph = graphviz.Source(dot_data)
prediction = clf.predict(X_test)
print "predicted:", prediction
print "true:", Y_test
print confusion_matrix(Y_test, prediction)
if __name__ == '__main__':
#knn()
#decision_tree()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment