Skip to content

Instantly share code, notes, and snippets.

@FelixChop
Created April 12, 2020 09:56
Show Gist options
  • Save FelixChop/18cac81b4c63a341c74226ad30b088b2 to your computer and use it in GitHub Desktop.
Save FelixChop/18cac81b4c63a341c74226ad30b088b2 to your computer and use it in GitHub Desktop.
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn_pandas import DataFrameMapper
from category_encoders import LeaveOneOutEncoder
imputer_Pclass = SimpleImputer(strategy='most_frequent', add_indicator=True)
imputer_Age = SimpleImputer(strategy='median', add_indicator=True)
imputer_SibSp = SimpleImputer(strategy='constant', fill_value=0, add_indicator=True)
imputer_Parch = SimpleImputer(strategy='constant', fill_value=0, add_indicator=True)
imputer_Fare = SimpleImputer(strategy='median', add_indicator=True)
imputer_Embarked = SimpleImputer(strategy='most_frequent')
scaler_Age = MinMaxScaler()
scaler_Fare = StandardScaler()
onehotencoder_Sex = OneHotEncoder(drop=['male'], handle_unknown='error')
onehotencoder_Embarked = OneHotEncoder(handle_unknown='error')
leaveoneout_encoder = LeaveOneOutEncoder(sigma=.1, random_state=2020)
mapper = DataFrameMapper([
(['Age'], [imputer_Age, scaler_Age], {'alias':'Age_scaled'}),
(['Pclass'], [imputer_Pclass]),
(['SibSp'], [imputer_SibSp]),
(['Parch'], [imputer_Parch]),
(['Fare'], [imputer_Fare, scaler_Fare], {'alias': 'Fare_scaled'}),
(['Sex'], [onehotencoder_Sex], {'alias': 'is_female'}),
(['Embarked'], [imputer_Embarked, onehotencoder_Embarked]),
(['Embarked_Pclass_Sex'], [leaveoneout_encoder])
], df_out=True) # use df_out to output as a pandas DataFrame
mapper.fit(X=train, y=train['Survived']) # you fit it like a sklearn ColumnTransformer
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment