Last active
August 29, 2024 17:40
-
-
Save tomquisel/a421235422fdf6b51ec2ccc5e3dee1b4 to your computer and use it in GitHub Desktop.
Version of scikit-learn's VotingClassifier that uses prefit models rather than requiring a refit.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class VotingClassifier(object): | |
"""Stripped-down version of VotingClassifier that uses prefit estimators""" | |
def __init__(self, estimators, voting='hard', weights=None): | |
self.estimators = [e[1] for e in estimators] | |
self.named_estimators = dict(estimators) | |
self.voting = voting | |
self.weights = weights | |
def fit(self, X, y, sample_weight=None): | |
raise NotImplementedError | |
def predict(self, X): | |
""" Predict class labels for X. | |
Parameters | |
---------- | |
X : {array-like, sparse matrix}, shape = [n_samples, n_features] | |
Training vectors, where n_samples is the number of samples and | |
n_features is the number of features. | |
Returns | |
---------- | |
maj : array-like, shape = [n_samples] | |
Predicted class labels. | |
""" | |
check_is_fitted(self, 'estimators') | |
if self.voting == 'soft': | |
maj = np.argmax(self.predict_proba(X), axis=1) | |
else: # 'hard' voting | |
predictions = self._predict(X) | |
maj = np.apply_along_axis(lambda x: | |
np.argmax(np.bincount(x, | |
weights=self.weights)), | |
axis=1, | |
arr=predictions.astype('int')) | |
return maj | |
def _collect_probas(self, X): | |
"""Collect results from clf.predict calls. """ | |
return np.asarray([clf.predict_proba(X) for clf in self.estimators]) | |
def _predict_proba(self, X): | |
"""Predict class probabilities for X in 'soft' voting """ | |
if self.voting == 'hard': | |
raise AttributeError("predict_proba is not available when" | |
" voting=%r" % self.voting) | |
check_is_fitted(self, 'estimators') | |
avg = np.average(self._collect_probas(X), axis=0, weights=self.weights) | |
return avg | |
@property | |
def predict_proba(self): | |
"""Compute probabilities of possible outcomes for samples in X. | |
Parameters | |
---------- | |
X : {array-like, sparse matrix}, shape = [n_samples, n_features] | |
Training vectors, where n_samples is the number of samples and | |
n_features is the number of features. | |
Returns | |
---------- | |
avg : array-like, shape = [n_samples, n_classes] | |
Weighted average probability for each class per sample. | |
""" | |
return self._predict_proba | |
def transform(self, X): | |
"""Return class labels or probabilities for X for each estimator. | |
Parameters | |
---------- | |
X : {array-like, sparse matrix}, shape = [n_samples, n_features] | |
Training vectors, where n_samples is the number of samples and | |
n_features is the number of features. | |
Returns | |
------- | |
If `voting='soft'`: | |
array-like = [n_classifiers, n_samples, n_classes] | |
Class probabilities calculated by each classifier. | |
If `voting='hard'`: | |
array-like = [n_samples, n_classifiers] | |
Class labels predicted by each classifier. | |
""" | |
check_is_fitted(self, 'estimators') | |
if self.voting == 'soft': | |
return self._collect_probas(X) | |
else: | |
return self._predict(X) | |
def _predict(self, X): | |
"""Collect results from clf.predict calls. """ | |
return np.asarray([clf.predict(X) for clf in self.estimators]).T |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you, I think I'll be able to use multithreading and speed up my testing (pre-fitting models in parallel)