Skip to content

Instantly share code, notes, and snippets.

@jerinisready
Last active April 4, 2018 08:11
Show Gist options
  • Save jerinisready/d98bf54c1cc5261b316385a3c604a9a6 to your computer and use it in GitHub Desktop.
Save jerinisready/d98bf54c1cc5261b316385a3c604a9a6 to your computer and use it in GitHub Desktop.
How to train and test iris dataset in python using DecisionTreeClassifier from sklearn.
from sklearn.datasets import load_iris
from sklearn import tree
import numpy as np
import csv
#
# TODO : This Commented code will run with dataset predefined in sklearn.datasets
#
# iris = load_iris()
# # print (iris.feature_names )
# # print (iris.target_names)
#
# test_ids = [0, 61, 105,]
# train_target = np.delete(iris.target, test_ids)
# train_data = np.delete(iris.data, test_ids, axis=0)
#
# test_target = iris.target[test_ids]
# test_data = iris.data[test_ids]
#
# clsf = tree.DecisionTreeClassifier()
# clsf.fit(train_data, train_target)
#
# print("Expected Results : ")
# for i in test_target:
# print("%s " % (i))
#
# print("Rendered Results : ", )
# for i in clsf.predict(test_data):
# print("%s " % (i))
feature_names = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
target_names = ['setosa' 'versicolor' 'virginica']
with open('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', "r") as myfile:
dataset = csv.reader(myfile)
dataset = [x for x in dataset] # to load data from file pointer to python memory
'''
test_ids, we are taking some random items from original dataset itself to test the trained data.
'''
test_ids = [48, 53, 102, 143]
# print('-'*70)
# print(dataset)
# print('-'*70)
'''
target : dependent data array.
data : independent data array.
'''
target = [x[-1] for x in dataset]
data = [x[:-2] for x in dataset]
'''
train_target : removing data to be tested from trainset just to ensure that we
are getting values from predition.
train_data : independent data array.
'''
train_target = np.delete(target, test_ids)
train_data = np.delete(data, test_ids, axis=0)
'''
test_target : taking test data seperately.
test_data : taking test data output seperately.
'''
test_target = [target[x] for x in test_ids]
test_data = [data[x] for x in test_ids]
'''
Creating a decision Tree
'''
clsf = tree.DecisionTreeClassifier()
'''
fixing data and training machine with dataset.
'''
clsf.fit(train_data, train_target)
print("Expected Results : ")
for i in test_target:
print("%s " % (i))
'''
predicting for sample datas
'''
predicted_output = clsf.predict(test_data)
print("Rendered Results : ", )
for i in predicted_output:
print("%s " % (i))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment