Skip to content

Instantly share code, notes, and snippets.

@andreasvc
Created November 10, 2013 15:25
Show Gist options
  • Save andreasvc/7399482 to your computer and use it in GitHub Desktop.
Save andreasvc/7399482 to your computer and use it in GitHub Desktop.
Classify rows from CSV files with SVM with leave-one-out cross-validation; labels taken from first column, of the form 'label_description'.
""" Classify rows from CSV files with SVM with leave-one-out cross-validation;
labels taken from first column, of the form 'label_description'. """
import sys
import pandas
from sklearn import svm, cross_validation, preprocessing
data = pandas.read_csv(sys.argv[1])
xdata = data.as_matrix(data.columns[1:])
#xdata = preprocessing.scale(xdata) # normalize data => mean of 0, stddev of 1
ylabels = [a.split('_')[0] for a in data.icol(0)]
ytarget = preprocessing.LabelEncoder().fit(ylabels).transform(ylabels)
linearsvc = svm.SVC(kernel='linear')
scores = pandas.Series(index=data.icol(0))
for train, test in cross_validation.LeaveOneOut(n=len(ytarget)):
classifier = linearsvc.fit(xdata[train], ytarget[train])
scores.iloc[test[0]] = classifier.score(xdata[test], ytarget[test])
print 'Scores:\n', scores
print 'Accuracy: %0.2f %%' % (100 * scores.mean(), )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment