Created
November 10, 2013 15:25
-
-
Save andreasvc/7399482 to your computer and use it in GitHub Desktop.
Classify rows from CSV files with SVM with leave-one-out cross-validation; labels taken from first column, of the form 'label_description'.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Classify rows from CSV files with SVM with leave-one-out cross-validation; | |
labels taken from first column, of the form 'label_description'. """ | |
import sys | |
import pandas | |
from sklearn import svm, cross_validation, preprocessing | |
data = pandas.read_csv(sys.argv[1]) | |
xdata = data.as_matrix(data.columns[1:]) | |
#xdata = preprocessing.scale(xdata) # normalize data => mean of 0, stddev of 1 | |
ylabels = [a.split('_')[0] for a in data.icol(0)] | |
ytarget = preprocessing.LabelEncoder().fit(ylabels).transform(ylabels) | |
linearsvc = svm.SVC(kernel='linear') | |
scores = pandas.Series(index=data.icol(0)) | |
for train, test in cross_validation.LeaveOneOut(n=len(ytarget)): | |
classifier = linearsvc.fit(xdata[train], ytarget[train]) | |
scores.iloc[test[0]] = classifier.score(xdata[test], ytarget[test]) | |
print 'Scores:\n', scores | |
print 'Accuracy: %0.2f %%' % (100 * scores.mean(), ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment