Skip to content

Instantly share code, notes, and snippets.

@tomquisel
Last active August 29, 2024 17:40
Show Gist options
  • Save tomquisel/a421235422fdf6b51ec2ccc5e3dee1b4 to your computer and use it in GitHub Desktop.
Save tomquisel/a421235422fdf6b51ec2ccc5e3dee1b4 to your computer and use it in GitHub Desktop.
Version of scikit-learn's VotingClassifier that uses prefit models rather than requiring a refit.
class VotingClassifier(object):
"""Stripped-down version of VotingClassifier that uses prefit estimators"""
def __init__(self, estimators, voting='hard', weights=None):
self.estimators = [e[1] for e in estimators]
self.named_estimators = dict(estimators)
self.voting = voting
self.weights = weights
def fit(self, X, y, sample_weight=None):
raise NotImplementedError
def predict(self, X):
""" Predict class labels for X.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns
----------
maj : array-like, shape = [n_samples]
Predicted class labels.
"""
check_is_fitted(self, 'estimators')
if self.voting == 'soft':
maj = np.argmax(self.predict_proba(X), axis=1)
else: # 'hard' voting
predictions = self._predict(X)
maj = np.apply_along_axis(lambda x:
np.argmax(np.bincount(x,
weights=self.weights)),
axis=1,
arr=predictions.astype('int'))
return maj
def _collect_probas(self, X):
"""Collect results from clf.predict calls. """
return np.asarray([clf.predict_proba(X) for clf in self.estimators])
def _predict_proba(self, X):
"""Predict class probabilities for X in 'soft' voting """
if self.voting == 'hard':
raise AttributeError("predict_proba is not available when"
" voting=%r" % self.voting)
check_is_fitted(self, 'estimators')
avg = np.average(self._collect_probas(X), axis=0, weights=self.weights)
return avg
@property
def predict_proba(self):
"""Compute probabilities of possible outcomes for samples in X.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns
----------
avg : array-like, shape = [n_samples, n_classes]
Weighted average probability for each class per sample.
"""
return self._predict_proba
def transform(self, X):
"""Return class labels or probabilities for X for each estimator.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns
-------
If `voting='soft'`:
array-like = [n_classifiers, n_samples, n_classes]
Class probabilities calculated by each classifier.
If `voting='hard'`:
array-like = [n_samples, n_classifiers]
Class labels predicted by each classifier.
"""
check_is_fitted(self, 'estimators')
if self.voting == 'soft':
return self._collect_probas(X)
else:
return self._predict(X)
def _predict(self, X):
"""Collect results from clf.predict calls. """
return np.asarray([clf.predict(X) for clf in self.estimators]).T
@alik604
Copy link

alik604 commented Nov 11, 2019

Thank you, I think I'll be able to use multithreading and speed up my testing (pre-fitting models in parallel)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment