Skip to content

Instantly share code, notes, and snippets.

@vene
Created May 27, 2012 19:21
Show Gist options
  • Save vene/2815589 to your computer and use it in GitHub Desktop.
Save vene/2815589 to your computer and use it in GitHub Desktop.
Support vector regression on Anscombe's third dataset
import os
import numpy as np
import matplotlib.pylab as pl
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
X = np.array([[13.], # This is dataset no. 3 from Anscombe's quartet.
[10.], # I moved the outlier to the first position for
[8.], # prettier code. This toy dataset illustrates
[9.], # the effect of outliers and assumptions when
[11.], # analyzing data using descriptive statistics.
[14.],
[6.], # This script shows the effect of SVR tube width
[4.], # when fitting a regression line.
[12.],
[7.],
[5.]])
y = np.array([12.74, 7.46, 6.77, 7.11, 7.81, 8.84, 6.08, 5.39,
8.15, 6.42, 5.73])
DELAY = 10 # gif animation delay in miliseconds
IMG_DIR = 'imgs_svr' # output directory for frames
OUT_GIF = 'svr.gif' # output gif (saved in current folder)
def compute_coefs(X, y, verbose=True):
if verbose:
print "Computing regression results..."
coefs = [] # list of (C, intercept, f(15), support, mse_outlier, mse)
for eps in np.linspace(3, 0.001, 100):
if verbose:
print "eps=%2.2f" % eps
svr = SVR(C=1.0, epsilon=eps, kernel='linear').fit(X, y)
y_pred = svr.predict(X)
mse_outlier = mean_squared_error(y, y_pred)
mse = mean_squared_error(y[1:], y_pred[1:]) # outlier is first item
coefs.append((eps, svr.predict(0.0), svr.predict(15.0), svr.support_,
mse_outlier, mse))
return coefs
def plot_coefs(X, y, coefs, verbose=True, noise=False):
if verbose:
print 'Plotting results...'
if not os.path.exists(IMG_DIR):
os.makedirs(IMG_DIR)
if noise:
y += np.random.randn(*y.shape)
for i, (eps, intercept, f_15, support, mse_outlier, mse) in enumerate(coefs):
pl.figure(figsize=(6, 4))
# circle the support vectors
pl.scatter(X[support], y[support], s=75, c='r', edgecolors='r',
facecolors='none', linewidths=2)
# plot all points
pl.scatter(X, y, s=40, c='r')
pl.xlim((2, 15))
pl.ylim((4, 14))
pl.plot((0, 15), (intercept, f_15))
pl.title('SVR regression on Anscombe\'s third dataset\n'
'$\\epsilon=%2.2f$, $MSE=%2.2f$, $MSE_{out}=%2.2f$'
% (eps, mse_outlier, mse),
size=15)
filename = '%02d.png' % i
pl.subplots_adjust(.07, .07, .94, .85, .2, .5)
pl.savefig(os.path.join(IMG_DIR, filename))
if verbose:
print 'Creating animated gif...'
err = os.system('convert -delay %d %s %s' % (
DELAY,
os.path.join(IMG_DIR, '*.png'),
OUT_GIF))
if err:
raise RuntimeError('Didn\'t manage to run ImageMagick. Check that '
'the \'convert\' command is in your path.')
if __name__ == '__main__':
plot_coefs(X, y, compute_coefs(X, y))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment