Skip to content

Instantly share code, notes, and snippets.

@grasses
Created April 15, 2017 09:24
Show Gist options
  • Save grasses/bd06ded5bb970108c91b51cfe4018d9c to your computer and use it in GitHub Desktop.
Save grasses/bd06ded5bb970108c91b51cfe4018d9c to your computer and use it in GitHub Desktop.
decision tree test, reading from csv
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pydotplus, csv
import numpy as np
from sklearn import tree, preprocessing
from sklearn.datasets import load_iris
from IPython.display import Image
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction import DictVectorizer
def from_csv(dir):
fcsv = open(dir, 'r')
reader = csv.reader(fcsv)
headers = reader.next()
feature_list = []
lable_list = []
for row in reader:
lable_list.append(row[len(row) - 1])
row_dict = {}
for i in range(0, len(row) - 1):
row_dict[headers[i]] = row[i]
feature_list.append(row_dict)
# generate data to X, Y
vec = DictVectorizer()
X = vec.fit_transform(feature_list).toarray()
Y = preprocessing.label_binarize(lable_list, list(set(lable_list)))
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf = clf.fit(X, Y)
# predict decision tree
new_row = X[0]
new_row[5] = 0
new_row[6] = 1
new_row[8] = 1
new_row[0] = 0
new_row[3] = 1
y = clf.predict([new_row, X[1]])
print(y)
# output pdf
iris = load_iris()
clf = tree.DecisionTreeClassifier()
clf = clf.fit(iris.data, iris.target)
dot_data = tree.export_graphviz(clf,
out_file=None, filled=True, rounded=True,
class_names=iris.target_names,
feature_names = vec.get_feature_names())
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_pdf("iris.pdf")
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())
if __name__ == '__main__':
# test data from: https://archive.ics.uci.edu/ml/datasets/Car+Evaluation
from_csv(r'data.tree.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment