chasedehan · May 6, 2025 22:06
diff --git a/pipeline with custom estimators b/pipeline with custom estimators
 import random

 import numpy as np
 from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.datasets import make_regression, make_classification
 from sklearn.linear_model import LogisticRegression


 class ChaseEstimator(BaseEstimator, RegressorMixin):
    # This is a reimplementation of linear regression as an example
    # Note that this implements predict since predict_proba isn't an option on LR
    # It depends on what is actually called in the prediction service
    # This estimator is created so that it can be used inside a Pipeline
    # which can then be the self.model that is exported
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X, y):
        X = np.asarray(X)
        y = np.asarray(y)
        X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add bias term
        theta_best = np.linalg.pinv(X_b.T @ X_b) @ X_b.T @ y
        self.intercept_ = theta_best[0]
        self.coef_ = theta_best[1:]
        return self

    def predict(self, X):
        X = np.asarray(X)
        return self.intercept_ + X @ self.coef_


 class ChaseClassifier(BaseEstimator, ClassifierMixin):
    # This is a reimplementation of linear regression as an example
    # Note that this implements predict since predict_proba isn't an option on LR
    # It depends on what is actually called in the prediction service
    # This estimator is created so that it can be used inside a Pipeline
    # which can then be the self.model that is exported
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X, y):
        return self

    def predict_proba(self, X):
        output = []
        for _ in range(X.shape[0]):
            r = random.random()
            output.append([r, 1-r])
        return output


 X, y = make_regression(n_samples=100, n_features=2, noise=0.1, random_state=42)
 pipeline = Pipeline(
    [("scaler", StandardScaler()), ("chase_estimator", ChaseEstimator())]
 )
 pipeline.fit(X, y)
 pipeline.predict(X)  # Regression task outputs 1d array

 X, y = make_classification(n_samples=200, n_features=4, n_informative=2, n_redundant=0, random_state=42)
 pipe2 = Pipeline(
    [("scaler", StandardScaler()), ("lr", LogisticRegression())]
 )
 pipe2.fit(X, y)
 pipe2.predict_proba(X)  # This works and outputs 2d array

 pipe3 = Pipeline(
    [("scaler", StandardScaler()), ("cc", ChaseClassifier())]
 )
 pipe3.fit(X, y)
 pipe3.predict_proba(X)  # This functions and will break without predict_proba method
	import random

	import numpy as np
	from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler
	from sklearn.datasets import make_regression, make_classification
	from sklearn.linear_model import LogisticRegression


	class ChaseEstimator(BaseEstimator, RegressorMixin):
	# This is a reimplementation of linear regression as an example
	# Note that this implements predict since predict_proba isn't an option on LR
	# It depends on what is actually called in the prediction service
	# This estimator is created so that it can be used inside a Pipeline
	# which can then be the self.model that is exported
	def __init__(self):
	self.coef_ = None
	self.intercept_ = None

	def fit(self, X, y):
	X = np.asarray(X)
	y = np.asarray(y)
	X_b = np.c_[np.ones((X.shape[0], 1)), X] # Add bias term
	theta_best = np.linalg.pinv(X_b.T @ X_b) @ X_b.T @ y
	self.intercept_ = theta_best[0]
	self.coef_ = theta_best[1:]
	return self

	def predict(self, X):
	X = np.asarray(X)
	return self.intercept_ + X @ self.coef_


	class ChaseClassifier(BaseEstimator, ClassifierMixin):
	# This is a reimplementation of linear regression as an example
	# Note that this implements predict since predict_proba isn't an option on LR
	# It depends on what is actually called in the prediction service
	# This estimator is created so that it can be used inside a Pipeline
	# which can then be the self.model that is exported
	def __init__(self):
	self.coef_ = None
	self.intercept_ = None

	def fit(self, X, y):
	return self

	def predict_proba(self, X):
	output = []
	for _ in range(X.shape[0]):
	r = random.random()
	output.append([r, 1-r])
	return output


	X, y = make_regression(n_samples=100, n_features=2, noise=0.1, random_state=42)
	pipeline = Pipeline(
	[("scaler", StandardScaler()), ("chase_estimator", ChaseEstimator())]
	)
	pipeline.fit(X, y)
	pipeline.predict(X) # Regression task outputs 1d array

	X, y = make_classification(n_samples=200, n_features=4, n_informative=2, n_redundant=0, random_state=42)
	pipe2 = Pipeline(
	[("scaler", StandardScaler()), ("lr", LogisticRegression())]
	)
	pipe2.fit(X, y)
	pipe2.predict_proba(X) # This works and outputs 2d array

	pipe3 = Pipeline(
	[("scaler", StandardScaler()), ("cc", ChaseClassifier())]
	)
	pipe3.fit(X, y)
	pipe3.predict_proba(X) # This functions and will break without predict_proba method