Created
May 6, 2025 22:06
-
-
Save chasedehan/decd1d741ffe53dd5fbfa6284f419461 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import numpy as np | |
from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.datasets import make_regression, make_classification | |
from sklearn.linear_model import LogisticRegression | |
class ChaseEstimator(BaseEstimator, RegressorMixin): | |
# This is a reimplementation of linear regression as an example | |
# Note that this implements predict since predict_proba isn't an option on LR | |
# It depends on what is actually called in the prediction service | |
# This estimator is created so that it can be used inside a Pipeline | |
# which can then be the self.model that is exported | |
def __init__(self): | |
self.coef_ = None | |
self.intercept_ = None | |
def fit(self, X, y): | |
X = np.asarray(X) | |
y = np.asarray(y) | |
X_b = np.c_[np.ones((X.shape[0], 1)), X] # Add bias term | |
theta_best = np.linalg.pinv(X_b.T @ X_b) @ X_b.T @ y | |
self.intercept_ = theta_best[0] | |
self.coef_ = theta_best[1:] | |
return self | |
def predict(self, X): | |
X = np.asarray(X) | |
return self.intercept_ + X @ self.coef_ | |
class ChaseClassifier(BaseEstimator, ClassifierMixin): | |
# This is a reimplementation of linear regression as an example | |
# Note that this implements predict since predict_proba isn't an option on LR | |
# It depends on what is actually called in the prediction service | |
# This estimator is created so that it can be used inside a Pipeline | |
# which can then be the self.model that is exported | |
def __init__(self): | |
self.coef_ = None | |
self.intercept_ = None | |
def fit(self, X, y): | |
return self | |
def predict_proba(self, X): | |
output = [] | |
for _ in range(X.shape[0]): | |
r = random.random() | |
output.append([r, 1-r]) | |
return output | |
X, y = make_regression(n_samples=100, n_features=2, noise=0.1, random_state=42) | |
pipeline = Pipeline( | |
[("scaler", StandardScaler()), ("chase_estimator", ChaseEstimator())] | |
) | |
pipeline.fit(X, y) | |
pipeline.predict(X) # Regression task outputs 1d array | |
X, y = make_classification(n_samples=200, n_features=4, n_informative=2, n_redundant=0, random_state=42) | |
pipe2 = Pipeline( | |
[("scaler", StandardScaler()), ("lr", LogisticRegression())] | |
) | |
pipe2.fit(X, y) | |
pipe2.predict_proba(X) # This works and outputs 2d array | |
pipe3 = Pipeline( | |
[("scaler", StandardScaler()), ("cc", ChaseClassifier())] | |
) | |
pipe3.fit(X, y) | |
pipe3.predict_proba(X) # This functions and will break without predict_proba method |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment