Skip to content

Instantly share code, notes, and snippets.

@chasedehan
Created May 6, 2025 22:06
Show Gist options
  • Save chasedehan/decd1d741ffe53dd5fbfa6284f419461 to your computer and use it in GitHub Desktop.
Save chasedehan/decd1d741ffe53dd5fbfa6284f419461 to your computer and use it in GitHub Desktop.
import random
import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression, make_classification
from sklearn.linear_model import LogisticRegression
class ChaseEstimator(BaseEstimator, RegressorMixin):
# This is a reimplementation of linear regression as an example
# Note that this implements predict since predict_proba isn't an option on LR
# It depends on what is actually called in the prediction service
# This estimator is created so that it can be used inside a Pipeline
# which can then be the self.model that is exported
def __init__(self):
self.coef_ = None
self.intercept_ = None
def fit(self, X, y):
X = np.asarray(X)
y = np.asarray(y)
X_b = np.c_[np.ones((X.shape[0], 1)), X] # Add bias term
theta_best = np.linalg.pinv(X_b.T @ X_b) @ X_b.T @ y
self.intercept_ = theta_best[0]
self.coef_ = theta_best[1:]
return self
def predict(self, X):
X = np.asarray(X)
return self.intercept_ + X @ self.coef_
class ChaseClassifier(BaseEstimator, ClassifierMixin):
# This is a reimplementation of linear regression as an example
# Note that this implements predict since predict_proba isn't an option on LR
# It depends on what is actually called in the prediction service
# This estimator is created so that it can be used inside a Pipeline
# which can then be the self.model that is exported
def __init__(self):
self.coef_ = None
self.intercept_ = None
def fit(self, X, y):
return self
def predict_proba(self, X):
output = []
for _ in range(X.shape[0]):
r = random.random()
output.append([r, 1-r])
return output
X, y = make_regression(n_samples=100, n_features=2, noise=0.1, random_state=42)
pipeline = Pipeline(
[("scaler", StandardScaler()), ("chase_estimator", ChaseEstimator())]
)
pipeline.fit(X, y)
pipeline.predict(X) # Regression task outputs 1d array
X, y = make_classification(n_samples=200, n_features=4, n_informative=2, n_redundant=0, random_state=42)
pipe2 = Pipeline(
[("scaler", StandardScaler()), ("lr", LogisticRegression())]
)
pipe2.fit(X, y)
pipe2.predict_proba(X) # This works and outputs 2d array
pipe3 = Pipeline(
[("scaler", StandardScaler()), ("cc", ChaseClassifier())]
)
pipe3.fit(X, y)
pipe3.predict_proba(X) # This functions and will break without predict_proba method
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment