Created
February 25, 2021 16:49
-
-
Save ksv-muralidhar/e3a57d09009b480b1ab025f2d4bdbcf8 to your computer and use it in GitHub Desktop.
Custom Transformers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.base import BaseEstimator,TransformerMixin | |
from sklearn.compose import ColumnTransformer | |
from sklearn.preprocessing import FunctionTransformer | |
from sklearn.datasets import load_iris | |
from sklearn.model_selection import GridSearchCV | |
from sklearn.pipeline import Pipeline | |
from sklearn.impute import SimpleImputer | |
from sklearn.linear_model import LogisticRegression | |
#Data Import | |
data = pd.DataFrame(load_iris()['data'],columns=load_iris()['feature_names']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#TransformerMixin enables fit_transform without declaring the method | |
class OutlierRemover(BaseEstimator,TransformerMixin): | |
def __init__(self,factor=1.5): | |
self.factor = factor | |
def outlier_removal(self,X,y=None): | |
X = pd.Series(X).copy() | |
q1 = X.quantile(0.25) | |
q3 = X.quantile(0.75) | |
iqr = q3 - q1 | |
lower_bound = q1 - (self.factor * iqr) | |
upper_bound = q3 + (self.factor * iqr) | |
X.loc[((X < lower_bound) | (X > upper_bound))] = np.nan | |
return pd.Series(X) | |
def fit(self,X,y=None): | |
return self | |
def transform(self,X,y=None): | |
return X.apply(self.outlier_removal) | |
#creating outlier_remover object of OutlierRemover class | |
outlier_remover = OutlierRemover() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment