vvksh · December 18, 2015 05:50 · vvksh · Dec 18, 2015
diff --git a/classifier.py b/classifier.py
 class KNN(Classifier):
    def __init__(self, dataObj=None, headers=[], categories=None, K=None):
        '''Take in a Data object with N points, a set of F headers, and a
        matrix of categories, with one category label for each data point.'''

        # call the parent init with the type
        Classifier.__init__(self, 'KNN Classifier')

        # store the headers used for classification
        self.headers = headers
        # number of classes and number of features
        self.num_classes = 0
        self.num_features = 0

        # original class labels
        self.class_labels = []

        # unique data for the KNN classifier: list of exemplars (matrices)
        self.exemplars = []
        if dataObj != None:
            A = dataObj.get_data(headers)
            self.build(A, categories, K)

    def build(self, A, categories, K=None):
        '''Builds the classifier give the data points in A and the categories'''

        # figure out how many categories there are and get the mapping (np.unique)
        unique, mapping = np.unique(np.array(categories.T), return_inverse=True)
        # print "unique","mapping", unique, mapping
        self.num_classes = len(unique)
        self.num_features = A.shape[1]
        self.class_labels = unique

        for i in range(self.num_classes):
            exemplar = A[(mapping == i), :]

            if K == None:
                # append to exemplars a matrix with all of the rows of A where the category/mapping is i
                # print exemplar.shape, "exemplar.shape", i, "ith exemplar"
                self.exemplars.append(exemplar)
            elif K != None and isinstance(K, int):  # check if K is an integer
                # W = vq.whiten(exemplar)

                codebook = an.kmeans_init(exemplar, K)

                (codebook, codes, errors) = an.kmeans_algorithm(exemplar, codebook) # run K-means on the rows of A where the category/mapping is i
                # print codebook.shape, "codebook", "ith codebook", i, K, "cluster"
                self.exemplars.append(codebook)  # append the codebook to the exemplars


        self.write("KNN_classifier.csv")

        return

    def classify(self, A, K=5, return_distances=False):
        '''Classify each row of A into one category. Return a matrix of
        category IDs in the range [0..C-1], and an array of class
        labels using the original label values. If return_distances is
        True, it also returns the NxC distance matrix.
        The parameter K specifies how many neighbors to use in the
        distance computation. The default is three.'''
        # print A, "A", self.exemplars, "exemplars"

        # error check to see if A has the same number of columns as the class means
        if A.shape[1] != self.num_features:
            print A.shape[1], self.num_features
            print "check data dimensions"
            return



        # make a matrix that is N x C to store the distance to each class for each data point
        D = np.matlib.zeros((A.shape[0], self.num_classes))  # a matrix of zeros that is N (rows of A) x C (number of classes)
        for k in range(self.num_classes):
            # make a temporary matrix that is N x M where M is the number of exemplars (rows in exemplars[i])

            temp = np.matrix(scipy.spatial.distance.cdist(A,self.exemplars[k],'euclidean'))

            temp = np.sort(temp,axis =1)
            # print temp, "temp sorted"
            D[:,k] = np.sum(temp[:,:K], axis =1) # sum the first K columns # this is the distance to the first class



        # calculate the most likely class for each data point
        cats = np.argmin(D,axis=1)  # take the argmin of D along axis 1


        # use the class ID as a lookup to generate the original labels
        labels = self.class_labels[cats]

        if return_distances:
            return cats, labels, D

        return cats, labels

    def __str__(self):
        '''Make a pretty string that prints out the classifier information.'''
        s = "\nKNN Classifier\n"
        for i in range(self.num_classes):
            s += 'Class %d --------------------\n' % (i)
            s += 'Number of Exemplars: %d\n' % (self.exemplars[i].shape[0])
            s += 'Mean of Exemplars  :' + str(np.mean(self.exemplars[i], axis=0)) + "\n"

        s += "\n"
        return s
	class KNN(Classifier):
	def __init__(self, dataObj=None, headers=[], categories=None, K=None):
	'''Take in a Data object with N points, a set of F headers, and a
	matrix of categories, with one category label for each data point.'''

	# call the parent init with the type
	Classifier.__init__(self, 'KNN Classifier')

	# store the headers used for classification
	self.headers = headers
	# number of classes and number of features
	self.num_classes = 0
	self.num_features = 0

	# original class labels
	self.class_labels = []

	# unique data for the KNN classifier: list of exemplars (matrices)
	self.exemplars = []
	if dataObj != None:
	A = dataObj.get_data(headers)
	self.build(A, categories, K)

	def build(self, A, categories, K=None):
	'''Builds the classifier give the data points in A and the categories'''

	# figure out how many categories there are and get the mapping (np.unique)
	unique, mapping = np.unique(np.array(categories.T), return_inverse=True)
	# print "unique","mapping", unique, mapping
	self.num_classes = len(unique)
	self.num_features = A.shape[1]
	self.class_labels = unique

	for i in range(self.num_classes):
	exemplar = A[(mapping == i), :]

	if K == None:
	# append to exemplars a matrix with all of the rows of A where the category/mapping is i
	# print exemplar.shape, "exemplar.shape", i, "ith exemplar"
	self.exemplars.append(exemplar)
	elif K != None and isinstance(K, int): # check if K is an integer
	# W = vq.whiten(exemplar)

	codebook = an.kmeans_init(exemplar, K)

	(codebook, codes, errors) = an.kmeans_algorithm(exemplar, codebook) # run K-means on the rows of A where the category/mapping is i
	# print codebook.shape, "codebook", "ith codebook", i, K, "cluster"
	self.exemplars.append(codebook) # append the codebook to the exemplars


	self.write("KNN_classifier.csv")

	return

	def classify(self, A, K=5, return_distances=False):
	'''Classify each row of A into one category. Return a matrix of
	category IDs in the range [0..C-1], and an array of class
	labels using the original label values. If return_distances is
	True, it also returns the NxC distance matrix.
	The parameter K specifies how many neighbors to use in the
	distance computation. The default is three.'''
	# print A, "A", self.exemplars, "exemplars"

	# error check to see if A has the same number of columns as the class means
	if A.shape[1] != self.num_features:
	print A.shape[1], self.num_features
	print "check data dimensions"
	return



	# make a matrix that is N x C to store the distance to each class for each data point
	D = np.matlib.zeros((A.shape[0], self.num_classes)) # a matrix of zeros that is N (rows of A) x C (number of classes)
	for k in range(self.num_classes):
	# make a temporary matrix that is N x M where M is the number of exemplars (rows in exemplars[i])

	temp = np.matrix(scipy.spatial.distance.cdist(A,self.exemplars[k],'euclidean'))

	temp = np.sort(temp,axis =1)
	# print temp, "temp sorted"
	D[:,k] = np.sum(temp[:,:K], axis =1) # sum the first K columns # this is the distance to the first class



	# calculate the most likely class for each data point
	cats = np.argmin(D,axis=1) # take the argmin of D along axis 1


	# use the class ID as a lookup to generate the original labels
	labels = self.class_labels[cats]

	if return_distances:
	return cats, labels, D

	return cats, labels

	def __str__(self):
	'''Make a pretty string that prints out the classifier information.'''
	s = "\nKNN Classifier\n"
	for i in range(self.num_classes):
	s += 'Class %d --------------------\n' % (i)
	s += 'Number of Exemplars: %d\n' % (self.exemplars[i].shape[0])
	s += 'Mean of Exemplars :' + str(np.mean(self.exemplars[i], axis=0)) + "\n"

	s += "\n"
	return s