amueller · January 15, 2013 19:10
diff --git a/msrc_svm.py b/msrc_svm.py
 import os                                                                                                                              
 from glob import glob                                                                                                                  
                                                                                                                                       
 import numpy as np                                                                                                                     
 import matplotlib.pyplot as plt                                                                                                        
                                                                                                                                       
 from sklearn.svm import LinearSVC                                                                                                      
 from sklearn.metrics import accuracy_score, confusion_matrix                                                                           
                                                                                                                                       
 classes = np.array(['building', 'grass', 'tree', 'cow', 'sheep', 'sky',                                                                
                    'aeroplane', 'water', 'face', 'car', 'bicycle', 'flower',                                                          
                    'sign', 'bird', 'book', 'chair', 'road', 'cat', 'dog',                                                             
                    'body', 'boat', 'void', 'mountain', 'horse'])                                                                      
                                                                                                                                       
                                                                                                                                       
 def load_data(dataset="train"):                                                                                                        
    # get the indices of mountain horse and void to remove later                                                                       
    mountain_idx = np.where(classes == "mountain")[0]                                                                                  
    horse_idx = np.where(classes == "horse")[0]                                                                                        
    void_idx = np.where(classes == "void")[0]                                                                                          
                                                                                                                                       
    # find out which dataset we should be loading                                                                                      
    ds_dict = dict(train="Train", val="Validation", test="Test")                                                                       
    if dataset not in ds_dict.keys():                                                                                                  
        raise ValueError("dataset must be one of 'train', 'val', 'test',"                                                              
                         " got %s" % dataset)                                                                                          
                                                                                                                                       
    ds_path = ds_dict[dataset]                                                                                                         
    features = []                                                                                                                      
    labels = []                                                                                                                        
    for f in glob(ds_path + "/*.dat"):                                                                                                 
        # for each file, get labels and features                                                                                       
        name = os.path.basename(f).split('.')[0]                                                                                       
        labels.append(np.loadtxt("labels/%s.txt" % name, dtype=np.int))                                                                
        feat = [np.loadtxt("%s/%s.local%s" % (ds_path, name, i))                                                                       
                for i in xrange(1, 7)]                                                                                                 
        features.append(np.hstack(feat))                                                                                               
    features = np.vstack(features)                                                                                                     
    labels = np.hstack(labels)                                                                                                         
    # remove mountain, horse and void                                                                                                  
    features = features[(labels != mountain_idx) * (labels != void_idx)                                                                
                        * (labels != horse_idx)]                                                                                       
    labels = labels[(labels != mountain_idx) * (labels != void_idx)                                                                    
                    * (labels != horse_idx)]                                                                                           
    return features, labels                                                                                                            
                                                                                                                                       
                                                                                                                                       
 def main():                                                                                                                            
    X_train, y_train = load_data()                                                                                                     
    X_val, y_val = load_data("val")                                                                                                    
    # put train and val together                                                                                                       
    X = np.vstack([X_train, X_val])                                                                                                    
    y = np.hstack([y_train, y_val])                                                                                                    
    X_test, y_test = load_data("test")                                                                                                 
                                                                                                                                       
    # fit a linear SVM                                                                                                                 
    clf = LinearSVC(C=0.0001)                                                                                                          
    clf.fit(X, y)                                                                                                                      
    # predict                                                                                                                          
    y_pred = clf.predict(X_test)                                                                                                       
    # evaluate using accuracy and mean accuracy                                                                                        
    # (via diagonal of confusion matrix)                                                                                               
    print(accuracy_score(y_test, y_pred))                                                                                              
    confusion = confusion_matrix(y_test, y_pred)                                                                                       
    plt.matshow(confusion)                                                                                                             
    confusion_normalized = (confusion.astype(np.float) /                                                                               
                            confusion.sum(axis=1)[:, np.newaxis])                                                                      
    print(np.mean(np.diag(confusion_normalized)))                                                                                      
    plt.matshow(confusion_normalized)                                                                                                  
    plt.show()                                                                                                                         
                                                                                                                                       
                                                                                                                                       
 if __name__ == "__main__":                                                                                                             
    main()
	import os
	from glob import glob

	import numpy as np
	import matplotlib.pyplot as plt

	from sklearn.svm import LinearSVC
	from sklearn.metrics import accuracy_score, confusion_matrix

	classes = np.array(['building', 'grass', 'tree', 'cow', 'sheep', 'sky',
	'aeroplane', 'water', 'face', 'car', 'bicycle', 'flower',
	'sign', 'bird', 'book', 'chair', 'road', 'cat', 'dog',
	'body', 'boat', 'void', 'mountain', 'horse'])


	def load_data(dataset="train"):
	# get the indices of mountain horse and void to remove later
	mountain_idx = np.where(classes == "mountain")[0]
	horse_idx = np.where(classes == "horse")[0]
	void_idx = np.where(classes == "void")[0]

	# find out which dataset we should be loading
	ds_dict = dict(train="Train", val="Validation", test="Test")
	if dataset not in ds_dict.keys():
	raise ValueError("dataset must be one of 'train', 'val', 'test',"
	" got %s" % dataset)

	ds_path = ds_dict[dataset]
	features = []
	labels = []
	for f in glob(ds_path + "/*.dat"):
	# for each file, get labels and features
	name = os.path.basename(f).split('.')[0]
	labels.append(np.loadtxt("labels/%s.txt" % name, dtype=np.int))
	feat = [np.loadtxt("%s/%s.local%s" % (ds_path, name, i))
	for i in xrange(1, 7)]
	features.append(np.hstack(feat))
	features = np.vstack(features)
	labels = np.hstack(labels)
	# remove mountain, horse and void
	features = features[(labels != mountain_idx) * (labels != void_idx)
	* (labels != horse_idx)]
	labels = labels[(labels != mountain_idx) * (labels != void_idx)
	* (labels != horse_idx)]
	return features, labels


	def main():
	X_train, y_train = load_data()
	X_val, y_val = load_data("val")
	# put train and val together
	X = np.vstack([X_train, X_val])
	y = np.hstack([y_train, y_val])
	X_test, y_test = load_data("test")

	# fit a linear SVM
	clf = LinearSVC(C=0.0001)
	clf.fit(X, y)
	# predict
	y_pred = clf.predict(X_test)
	# evaluate using accuracy and mean accuracy
	# (via diagonal of confusion matrix)
	print(accuracy_score(y_test, y_pred))
	confusion = confusion_matrix(y_test, y_pred)
	plt.matshow(confusion)
	confusion_normalized = (confusion.astype(np.float) /
	confusion.sum(axis=1)[:, np.newaxis])
	print(np.mean(np.diag(confusion_normalized)))
	plt.matshow(confusion_normalized)
	plt.show()


	if __name__ == "__main__":
	main()