grantmwilliams · June 27, 2019 22:40
diff --git a/Decision_Tree_Classifier_Example.py b/Decision_Tree_Classifier_Example.py
 """ This file uses a decision tree to classify the input data
 and then loads the appropriate auxiliary dataset from the classification result

 Uses SKlearn's  decision tree implementation and as an example the
 SKlearn iris dataset
 """
 import sys
 import numpy as np # only used to create our example datasets.
 import pandas as pd # only used to create our example datasets.
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
 from sklearn.tree import DecisionTreeClassifier

 def create_example_datasets(iris):
    data = iris["data"]
    categories = iris["target"]

    # create dataframes of only the rows that have a specific flower and give the columns names
    iris_setosa = pd.DataFrame(data=data[categories == 0, :], columns=iris["feature_names"])
    iris_versicolor = pd.DataFrame(data=data[categories == 1, :], columns=iris["feature_names"])
    iris_virginica= pd.DataFrame(data=data[categories == 2, :], columns=iris["feature_names"])

    # write the flower specific dataframes to csv files
    iris_setosa.to_csv("datasets/setosa.csv")
    iris_versicolor.to_csv("datasets/versicolor.csv")
    iris_virginica.to_csv("datasets/virginica.csv")

 def load_dataset(prediction):
    """ given a prediction return the appropriate dataset

    returns a file path to a new dataset
    """
    # create a mapping from classification to dataset file path
    data_set_mapping = {
        "virginica": "virginica.csv",
        "setosa": "setosa.csv",
        "versicolor": "versicolor.csv"
    }
    file_path = "datasets"
    file_name = data_set_mapping[prediction]

    # now you would want to implement the logic to load the appropriate dataset here
    # as an example I use one of the csv files we created from the subset of the iris dataset
    df = pd.read_csv(file_path + "/" + file_name)

    # drop the unnamed column from our csv
    df = df.loc[:, ~df.columns.str.contains("^Unnamed")]
    return df


 # load the dataset we are going to use in the decision tree
 iris = load_iris()

 # uses our iris dataset to create the datasets we want to load after our decision tree is evluated
 create_example_datasets(iris)

 X = iris["data"]
 y = iris["target"]

 X_train, X_test, y_train, y_test = train_test_split(X, y)

 model = DecisionTreeClassifier()
 model.fit(X_train, y_train)

 print("Model Accuracy: {}%".format(model.score(X_test, y_test)*100))
 y_predict = model.predict(X_test)
 # use data set to get the names of the flowers from our predictions
 predicted_names = [iris["target_names"][i] for i in y_predict]

 # as an example we load the dataset for the first predcited name
 print("Loading Dataset for {}".format(predicted_names[0]))
 output_dataset = load_dataset(predicted_names[0])
 # temporarily prints all columns on the same line
 with pd.option_context("expand_frame_repr", False):
    print(output_dataset.head(5)
	""" This file uses a decision tree to classify the input data
	and then loads the appropriate auxiliary dataset from the classification result

	Uses SKlearn's decision tree implementation and as an example the
	SKlearn iris dataset
	"""
	import sys
	import numpy as np # only used to create our example datasets.
	import pandas as pd # only used to create our example datasets.
	from sklearn.datasets import load_iris
	from sklearn.model_selection import train_test_split
	from sklearn.tree import DecisionTreeClassifier

	def create_example_datasets(iris):
	data = iris["data"]
	categories = iris["target"]

	# create dataframes of only the rows that have a specific flower and give the columns names
	iris_setosa = pd.DataFrame(data=data[categories == 0, :], columns=iris["feature_names"])
	iris_versicolor = pd.DataFrame(data=data[categories == 1, :], columns=iris["feature_names"])
	iris_virginica= pd.DataFrame(data=data[categories == 2, :], columns=iris["feature_names"])

	# write the flower specific dataframes to csv files
	iris_setosa.to_csv("datasets/setosa.csv")
	iris_versicolor.to_csv("datasets/versicolor.csv")
	iris_virginica.to_csv("datasets/virginica.csv")

	def load_dataset(prediction):
	""" given a prediction return the appropriate dataset

	returns a file path to a new dataset
	"""
	# create a mapping from classification to dataset file path
	data_set_mapping = {
	"virginica": "virginica.csv",
	"setosa": "setosa.csv",
	"versicolor": "versicolor.csv"
	}
	file_path = "datasets"
	file_name = data_set_mapping[prediction]

	# now you would want to implement the logic to load the appropriate dataset here
	# as an example I use one of the csv files we created from the subset of the iris dataset
	df = pd.read_csv(file_path + "/" + file_name)

	# drop the unnamed column from our csv
	df = df.loc[:, ~df.columns.str.contains("^Unnamed")]
	return df


	# load the dataset we are going to use in the decision tree
	iris = load_iris()

	# uses our iris dataset to create the datasets we want to load after our decision tree is evluated
	create_example_datasets(iris)

	X = iris["data"]
	y = iris["target"]

	X_train, X_test, y_train, y_test = train_test_split(X, y)

	model = DecisionTreeClassifier()
	model.fit(X_train, y_train)

	print("Model Accuracy: {}%".format(model.score(X_test, y_test)*100))
	y_predict = model.predict(X_test)
	# use data set to get the names of the flowers from our predictions
	predicted_names = [iris["target_names"][i] for i in y_predict]

	# as an example we load the dataset for the first predcited name
	print("Loading Dataset for {}".format(predicted_names[0]))
	output_dataset = load_dataset(predicted_names[0])
	# temporarily prints all columns on the same line
	with pd.option_context("expand_frame_repr", False):
	print(output_dataset.head(5)