Created
May 27, 2012 19:21
-
-
Save vene/2815589 to your computer and use it in GitHub Desktop.
Support vector regression on Anscombe's third dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import numpy as np | |
import matplotlib.pylab as pl | |
from sklearn.svm import SVR | |
from sklearn.metrics import mean_squared_error | |
X = np.array([[13.], # This is dataset no. 3 from Anscombe's quartet. | |
[10.], # I moved the outlier to the first position for | |
[8.], # prettier code. This toy dataset illustrates | |
[9.], # the effect of outliers and assumptions when | |
[11.], # analyzing data using descriptive statistics. | |
[14.], | |
[6.], # This script shows the effect of SVR tube width | |
[4.], # when fitting a regression line. | |
[12.], | |
[7.], | |
[5.]]) | |
y = np.array([12.74, 7.46, 6.77, 7.11, 7.81, 8.84, 6.08, 5.39, | |
8.15, 6.42, 5.73]) | |
DELAY = 10 # gif animation delay in miliseconds | |
IMG_DIR = 'imgs_svr' # output directory for frames | |
OUT_GIF = 'svr.gif' # output gif (saved in current folder) | |
def compute_coefs(X, y, verbose=True): | |
if verbose: | |
print "Computing regression results..." | |
coefs = [] # list of (C, intercept, f(15), support, mse_outlier, mse) | |
for eps in np.linspace(3, 0.001, 100): | |
if verbose: | |
print "eps=%2.2f" % eps | |
svr = SVR(C=1.0, epsilon=eps, kernel='linear').fit(X, y) | |
y_pred = svr.predict(X) | |
mse_outlier = mean_squared_error(y, y_pred) | |
mse = mean_squared_error(y[1:], y_pred[1:]) # outlier is first item | |
coefs.append((eps, svr.predict(0.0), svr.predict(15.0), svr.support_, | |
mse_outlier, mse)) | |
return coefs | |
def plot_coefs(X, y, coefs, verbose=True, noise=False): | |
if verbose: | |
print 'Plotting results...' | |
if not os.path.exists(IMG_DIR): | |
os.makedirs(IMG_DIR) | |
if noise: | |
y += np.random.randn(*y.shape) | |
for i, (eps, intercept, f_15, support, mse_outlier, mse) in enumerate(coefs): | |
pl.figure(figsize=(6, 4)) | |
# circle the support vectors | |
pl.scatter(X[support], y[support], s=75, c='r', edgecolors='r', | |
facecolors='none', linewidths=2) | |
# plot all points | |
pl.scatter(X, y, s=40, c='r') | |
pl.xlim((2, 15)) | |
pl.ylim((4, 14)) | |
pl.plot((0, 15), (intercept, f_15)) | |
pl.title('SVR regression on Anscombe\'s third dataset\n' | |
'$\\epsilon=%2.2f$, $MSE=%2.2f$, $MSE_{out}=%2.2f$' | |
% (eps, mse_outlier, mse), | |
size=15) | |
filename = '%02d.png' % i | |
pl.subplots_adjust(.07, .07, .94, .85, .2, .5) | |
pl.savefig(os.path.join(IMG_DIR, filename)) | |
if verbose: | |
print 'Creating animated gif...' | |
err = os.system('convert -delay %d %s %s' % ( | |
DELAY, | |
os.path.join(IMG_DIR, '*.png'), | |
OUT_GIF)) | |
if err: | |
raise RuntimeError('Didn\'t manage to run ImageMagick. Check that ' | |
'the \'convert\' command is in your path.') | |
if __name__ == '__main__': | |
plot_coefs(X, y, compute_coefs(X, y)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment