Skip to content

Instantly share code, notes, and snippets.

@StrikingLoo
Created April 28, 2021 22:18
Show Gist options
  • Save StrikingLoo/cbf493371958b20479d12d6709453be6 to your computer and use it in GitHub Desktop.
Save StrikingLoo/cbf493371958b20479d12d6709453be6 to your computer and use it in GitHub Desktop.
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator, TransformerMixin
housing_num = housing.drop("ocean_proximity", axis=1)
num_attribs = list(housing_num)
print(num_attribs)
num_attribs.remove('median_house_value')
print(num_attribs)
cat_attribs = ["ocean_proximity"]
# column index
rooms_ix, bedrooms_ix, population_ix, households_ix = 3, 4, 5, 6
class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
def __init__(self, add_bedrooms_per_room=True): # no *args or **kargs
self.add_bedrooms_per_room = add_bedrooms_per_room
def fit(self, X, y=None):
return self # nothing else to do
def transform(self, X):
rooms_per_household = X[:, rooms_ix] / X[:, households_ix]
population_per_household = X[:, population_ix] / X[:, households_ix]
if self.add_bedrooms_per_room:
bedrooms_per_room = X[:, bedrooms_ix] / X[:, rooms_ix]
return np.c_[X, rooms_per_household, population_per_household,
bedrooms_per_room]
else:
return np.c_[X, rooms_per_household, population_per_household]
num_pipeline = Pipeline([
('imputer', SimpleImputer(strategy="median")),
('attribs_adder', CombinedAttributesAdder()),
('std_scaler', StandardScaler()),
])
full_pipeline = ColumnTransformer([
("num", num_pipeline, num_attribs),
("cat", OneHotEncoder(), cat_attribs),
])
housing_prepared = full_pipeline.fit_transform(housing)
housing_filtered = housing[housing['median_house_value']<500000]
housing_labels_filtered = housing[housing['median_house_value']<500000]['median_house_value']
housing_labels = housing['median_house_value']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment