Last active
April 4, 2018 08:11
-
-
Save jerinisready/d98bf54c1cc5261b316385a3c604a9a6 to your computer and use it in GitHub Desktop.
How to train and test iris dataset in python using DecisionTreeClassifier from sklearn.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import load_iris | |
from sklearn import tree | |
import numpy as np | |
import csv | |
# | |
# TODO : This Commented code will run with dataset predefined in sklearn.datasets | |
# | |
# iris = load_iris() | |
# # print (iris.feature_names ) | |
# # print (iris.target_names) | |
# | |
# test_ids = [0, 61, 105,] | |
# train_target = np.delete(iris.target, test_ids) | |
# train_data = np.delete(iris.data, test_ids, axis=0) | |
# | |
# test_target = iris.target[test_ids] | |
# test_data = iris.data[test_ids] | |
# | |
# clsf = tree.DecisionTreeClassifier() | |
# clsf.fit(train_data, train_target) | |
# | |
# print("Expected Results : ") | |
# for i in test_target: | |
# print("%s " % (i)) | |
# | |
# print("Rendered Results : ", ) | |
# for i in clsf.predict(test_data): | |
# print("%s " % (i)) | |
feature_names = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'] | |
target_names = ['setosa' 'versicolor' 'virginica'] | |
with open('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', "r") as myfile: | |
dataset = csv.reader(myfile) | |
dataset = [x for x in dataset] # to load data from file pointer to python memory | |
''' | |
test_ids, we are taking some random items from original dataset itself to test the trained data. | |
''' | |
test_ids = [48, 53, 102, 143] | |
# print('-'*70) | |
# print(dataset) | |
# print('-'*70) | |
''' | |
target : dependent data array. | |
data : independent data array. | |
''' | |
target = [x[-1] for x in dataset] | |
data = [x[:-2] for x in dataset] | |
''' | |
train_target : removing data to be tested from trainset just to ensure that we | |
are getting values from predition. | |
train_data : independent data array. | |
''' | |
train_target = np.delete(target, test_ids) | |
train_data = np.delete(data, test_ids, axis=0) | |
''' | |
test_target : taking test data seperately. | |
test_data : taking test data output seperately. | |
''' | |
test_target = [target[x] for x in test_ids] | |
test_data = [data[x] for x in test_ids] | |
''' | |
Creating a decision Tree | |
''' | |
clsf = tree.DecisionTreeClassifier() | |
''' | |
fixing data and training machine with dataset. | |
''' | |
clsf.fit(train_data, train_target) | |
print("Expected Results : ") | |
for i in test_target: | |
print("%s " % (i)) | |
''' | |
predicting for sample datas | |
''' | |
predicted_output = clsf.predict(test_data) | |
print("Rendered Results : ", ) | |
for i in predicted_output: | |
print("%s " % (i)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment