Skip to content

Instantly share code, notes, and snippets.

@GaelVaroquaux
Created September 24, 2012 13:08
Show Gist options
  • Save GaelVaroquaux/3775870 to your computer and use it in GitHub Desktop.
Save GaelVaroquaux/3775870 to your computer and use it in GitHub Desktop.
Benchmark of elastic net on a very sparse system
# Licence : BSD
# Author: Gael Varoquaux
from time import time
import numpy as np
import pylab as pl
from scipy import linalg, ndimage
from sklearn import linear_model
from sklearn.linear_model.coordinate_descent import elastic_net_strong_rule_active_set
from sklearn.linear_model.cd_fast import elastic_net_kkt_violating_features
from sklearn.utils import check_random_state
###############################################################################
# Fonction to generate data
def create_simulation_data(snr=5, n_samples=2 * 100, size=12, random_state=0):
generator = check_random_state(random_state)
roi_size = 3 # size / 3
smooth_X = 2
### Coefs
w = np.zeros((size, size, size))
w[0:roi_size, 0:roi_size, 0:roi_size] = -0.6
w[-roi_size:, -roi_size:, 0:roi_size] = 0.5
w[0:roi_size, -roi_size:, -roi_size:] = -0.6
w[-roi_size:, 0:roi_size:, -roi_size:] = 0.5
w = w.ravel()
### Images
XX = generator.randn(n_samples, size, size, size)
X = []
y = []
for i in range(n_samples):
Xi = ndimage.filters.gaussian_filter(XX[i, :, :, :], smooth_X)
Xi = Xi.ravel()
X.append(Xi)
y.append(np.dot(Xi, w))
X = np.array(X)
y = np.array(y)
norm_noise = linalg.norm(y, 2) / np.exp(snr / 20.)
orig_noise = generator.randn(y.shape[0])
noise_coef = norm_noise / linalg.norm(orig_noise, 2)
# Add additive noise
noise = noise_coef * orig_noise
snr = 20 * np.log(linalg.norm(y, 2) / linalg.norm(noise, 2))
print "SNR : %d " % snr
y += noise
X -= X.mean(axis=-1)[:, np.newaxis]
X /= X.std(axis=-1)[:, np.newaxis]
X_test = X[n_samples / 2:, :]
X_train = X[:n_samples / 2, :]
y_test = y[n_samples / 2:]
y = y[:n_samples / 2]
return X_train, X_test, y, y_test, snr, noise, w, size
def plot_slices(data, title=None):
pl.figure(figsize=(5.5, 2.2))
vmax = np.abs(data).max()
for i in (0, 6, 11):
pl.subplot(1, 3, i / 5 + 1)
pl.imshow(data[:, :, i], vmin=-vmax, vmax=vmax,
interpolation="nearest", cmap=pl.cm.RdBu_r)
pl.xticks(())
pl.yticks(())
pl.subplots_adjust(hspace=0.05, wspace=0.05, left=.03, right=.97)
if title is not None:
pl.suptitle(title)
###############################################################################
# Create data
X_train, X_test, y_train, y_test, snr, noise, coefs, size =\
create_simulation_data(snr=10, n_samples=400, size=60)
coefs = np.reshape(coefs, [size, size, size])
#plot_slices(coefs, title="Ground truth")
n_samples = len(X_train)
alpha = 1.
rho = 0.95
l1_reg = alpha * rho * n_samples
l2_reg = alpha * (1.0 - rho) * n_samples
###############################################################################
# Compute the results and estimated coef maps for different estimators
classifiers = [
#('enet', linear_model.ElasticNet(alpha=alpha, rho=rho)),
('enet_sr', linear_model.ElasticNet(alpha=alpha, rho=rho,
use_strong_rule=True)),
]
# Run the estimators
for name, classifier in classifiers:
times = list()
for _ in range(2):
t1 = time()
classifier.fit(X_train, y_train)
elapsed_time = time() - t1
times.append(elapsed_time)
coefs = classifier.coef_
coef_flat = np.ascontiguousarray(coefs.copy().ravel())
coefs = np.reshape(coefs, [size, size, size])
score = classifier.score(X_test, y_test)
title = '%s: prediction score %.3f, training time: %.4fs (%.4fs)' % (
classifier.__class__.__name__, score,
np.mean(times), np.std(times))
# We use the plot_slices function provided in the example to
# plot the results
#plot_slices(coefs, title=title)
print title
print 'Number of non-zero coefs', (np.abs(coefs) > 1e-9).sum()
print 'Number of coefs selected by the strong rules', \
np.sum(elastic_net_strong_rule_active_set(X_train,
y_train, alpha=alpha, rho=rho))
print 'Number of coefs selected by the strong rules with reinit', \
np.sum(elastic_net_strong_rule_active_set(X_train,
y_train, alpha=alpha, rho=rho,
coef_init=coef_flat, alpha_init=alpha))
kkt_violators = elastic_net_kkt_violating_features(
coef_flat, l1_reg, l2_reg, X_train, y_train)
print 'Number of violating features', len(kkt_violators)
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment