Last active
August 29, 2015 14:20
-
-
Save kingjr/cdc853e09aae9dde537c to your computer and use it in GitHub Desktop.
This aims at reproducing the sklearn.svm.SVC object without having to store 'support_vectors_' and '_dual_coef_'
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author: Jean-Remi King <[email protected]> | |
# | |
# License: BSD (3-clause) | |
import warnings | |
import numpy as np | |
import scipy.sparse as sp | |
from sklearn.svm import SVC, LinearSVC | |
from sklearn.datasets import make_classification | |
from sklearn.calibration import CalibratedClassifierCV | |
# CLASSIFIERS | |
def SVC_Light(probability=False, method='sigmoid', cv=5, **kwargs): | |
""" | |
Similar to SVC(kernel='linear') without having to store 'support_vectors_' | |
and '_dual_coef_'. | |
Uses CalibrationClassifierCV if probability=True. | |
""" | |
if probability is True: | |
base_estimator = _SVC_Light(probability=True, **kwargs) | |
return _SVC_Light_Proba(base_estimator=base_estimator, method=method, | |
cv=cv) | |
else: | |
return _SVC_Light(**kwargs) | |
class _SVC_Light_Proba(CalibratedClassifierCV): | |
def decision_function(self, X): | |
warnings.warn( | |
"With 'probability=True' decision_function=predict_proba") | |
return self.predict_proba(X) | |
def fit(self, X, y): | |
if len(np.unique(y)) > 2: | |
# XXX | |
raise ValueError('_SVC_Light currently does not support ' | |
'probability=True for more than 2 classes.') | |
super(_SVC_Light_Proba, self).fit(X, y) | |
class _SVC_Light(SVC): | |
""" | |
Similar to SVC(kernel='linear') without having to store 'support_vectors_' | |
and '_dual_coef_' | |
""" | |
def __init__(self, kernel='linear', probability=False, **kwargs): | |
if 'kernel' in kwargs.keys(): | |
raise ValueError('SVC_Light is only available when using a ' | |
'linear kernel.') | |
if 'probability' in kwargs.keys(): | |
raise RuntimeError('Currently, SVC_Light does not support ' | |
'probability=True') | |
super(_SVC_Light, self).__init__(kernel=kernel, | |
probability=probability, **kwargs) | |
def fit(self, X, y, scaling=None): | |
super(_SVC_Light, self).fit(X, y) | |
# compute coef from support vectors once only | |
self._coef_ = self._compute_coef_() | |
self.__delattr__('support_vectors_') | |
self.__delattr__('_dual_coef_') | |
def _compute_coef_(self): | |
# Originally coef_(self) from SVC | |
coef = self._get_coef() | |
if sp.issparse(coef): | |
coef.data.flags.writeable = False | |
else: | |
coef.flags.writeable = False | |
return coef | |
def predict(self, X): | |
distances = self.decision_function(X) | |
y_pred = predict_OneVsOne(distances, self.classes_) | |
return y_pred | |
def decision_function(self, X): | |
X = self._validate_for_predict(X) | |
n_sample = X.shape[0] | |
intercept = np.tile(self.intercept_, (n_sample, 1)) | |
distances = np.dot(self.coef_, X.T).T + intercept | |
if len(self.classes_) == 2: | |
distances *= -1 | |
return distances | |
@property | |
def coef_(self): | |
return self._coef_ | |
# PREDICTERS | |
def predict_OneVsOne(confidence, classes): | |
# for SVC, NuSVC | |
n_samples, n_w = confidence.shape | |
votes = np.zeros((n_samples, n_w)) | |
k = 0 | |
for i, class1 in enumerate(classes): | |
for j, class2 in enumerate(classes[(i + 1):]): | |
compared_classes = np.array([class1, class2]) | |
comparison = confidence[:, k] < 0 | |
votes[:, k] = compared_classes[comparison.astype(int)] | |
k += 1 | |
summed_votes = np.array([np.sum(votes == c, axis=1) for c in classes]).T | |
y_pred = predict_OneVsRest(summed_votes, classes) | |
return y_pred | |
def predict_OneVsRest(confidence, classes): | |
# for LinearSVC | |
return np.array(classes[confidence.argmax(axis=1)]) | |
# setup dataset -------------------------------------------------------------- | |
X, y = make_classification(n_informative=10, n_classes=2) | |
# 1. Classic pipeline -------------------------------------------------------- | |
svc = SVC(kernel='linear') | |
svc.fit(X, y) | |
y_pred = svc.decision_function(X) | |
score = svc.score(X, y) | |
# 2. Linear SVC --------------------------------------------------------------- | |
linearsvc = LinearSVC() | |
linearsvc.fit(X, y) | |
y_pred = linearsvc.decision_function(X) | |
score_linear = linearsvc.score(X, y) | |
# 3. Light SVC ---------------------------------------------------------------- | |
svc_light = SVC_Light(kernel='linear') | |
svc_light.fit(X, y) | |
y_pred_light = svc_light.predict(X) | |
score_light = svc_light.score(X, y) | |
# 4. Light SVC Proba----------------------------------------------------------- | |
svc_light_proba = SVC_Light(kernel='linear', probability=True) | |
svc_light_proba.fit(X, y) | |
y_pred_light_proba = svc_light_proba.predict_proba(X) | |
y_pred_light_proba = svc_light_proba.predict(X) | |
score_light_proba = svc_light_proba.score(X, y) | |
print([score, score_linear, score_light, score_light_proba]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment