Last active
August 29, 2015 14:21
-
-
Save moriarty/acd6971d89429ba60b12 to your computer and use it in GitHub Desktop.
Learning and Adaptivity In Class Assignment READ DATA
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| """ | |
| @author: Alex Moriarty | |
| """ | |
| import numpy as np | |
| import sklearn | |
| from sklearn import tree | |
| from sklearn.externals.six import StringIO | |
| import pydot | |
| import matplotlib.pyplot as plt | |
| # hint: Python 1d arrays, vs row and column vectors | |
| # A = np.arange(10) | |
| # print A.shape | |
| # print A[:, np.newaxis] | |
| # print A[np.newaxis, :] | |
| class SimplePredictionQuestion(object): | |
| def __init__(self): | |
| self.read_prepare_data() | |
| def read_prepare_data(self): | |
| # Read the data from file | |
| zoo_data = np.genfromtxt('data/zoo/zoo.data', delimiter=',') | |
| zoo_data = np.delete(zoo_data, 0, 1) | |
| # The first column of data is of type string, amd wasn't read properly | |
| zoo_data_raw = np.genfromtxt('data/zoo/zoo.data', delimiter=',', dtype=None) | |
| zoo_names = list() | |
| for i in xrange(zoo_data_raw.size): | |
| zoo_names.append(zoo_data_raw[i][0]) | |
| zoo_names = np.array(zoo_names) | |
| # Now encode the strings | |
| self.le = sklearn.preprocessing.LabelEncoder() | |
| self.le.fit(zoo_names) | |
| encoded_names = self.le.transform(zoo_names)[:,np.newaxis] | |
| self.all_zoo_data = np.hstack((encoded_names, zoo_data)) | |
| # prepare the data sets and labels required for each part of question | |
| self.data_a = self.all_zoo_data[:,1:16] | |
| self.labels_a = self.all_zoo_data[:,17] | |
| self.data_b = self.all_zoo_data[:,0:16] | |
| self.labels_b = self.all_zoo_data[:,17] | |
| self.data_c = np.delete(self.all_zoo_data, [0, 5], 1) | |
| self.labels_c = self.all_zoo_data[:,5] | |
| self.data_d = self.all_zoo_data[:,1:17] | |
| self.labels_d = self.all_zoo_data[:,0] | |
| def question_one(self, data, labels, question_part): | |
| # WRITE 3 LINES HERE | |
| pass | |
| def draw_graph(self, classifier, filename="out.svg"): | |
| dot_data = StringIO() | |
| tree.export_graphviz(classifier, out_file=dot_data) | |
| graph = pydot.graph_from_dot_data(dot_data.getvalue()) | |
| graph.write_svg(filename+".svg") | |
| def do(self, split=None): | |
| #print "doing part a" | |
| if split is not None: | |
| split_at = int(self.all_zoo_data.shape[0]*split/100) | |
| else: | |
| split_at = None | |
| self.question_one(self.data_a[:split_at,:], self.labels_a[:split_at], "a") | |
| A = self.classifier.predict(self.data_a[split_at:]) | |
| result = (A == self.labels_a[split_at:]) | |
| return 1.0 * np.where(result)[0].size / result.size | |
| #print "doing part b" | |
| #self.question_one(self.data_b, self.labels_b, "b") | |
| #print "doing part c" | |
| #self.question_one(self.data_c, self.labels_c, "c") | |
| #part d is messy | |
| #print "doing part d" | |
| #self.question_one(self.data_d, self.labels_d, "d") | |
| def main(): | |
| exercise_six = SimplePredictionQuestion() | |
| num_trials = 1000 | |
| split = 35 | |
| assert(num_trials <= 10000), "Whoa! that's over 9000" | |
| assert(split <= 100), "The split must be under 100" | |
| result = np.zeros(num_trials) | |
| for i in xrange(num_trials): | |
| result[i] = exercise_six.do(split) | |
| print "min: \t\t", np.amin(result) | |
| print "mean: \t\t", np.mean(result) | |
| print "median: \t", np.median(result) | |
| print "max: \t\t", np.amax(result) | |
| print np.unique(result) | |
| plt.figure(1) | |
| plt.subplot(111) | |
| plt.boxplot(result, vert=False) | |
| plt.show() | |
| if "__name__" == "__main__": | |
| main() | |
| else: | |
| main() |
Author
solution for trouble with pyparsing
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
import numpy as np
import sklearn
from sklearn import tree
from sklearn.externals.six import StringIO
import pydot
import matplotlib.pyplot as plt