Created
April 28, 2021 22:18
-
-
Save StrikingLoo/cbf493371958b20479d12d6709453be6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.compose import ColumnTransformer | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.impute import SimpleImputer | |
from sklearn.base import BaseEstimator, TransformerMixin | |
housing_num = housing.drop("ocean_proximity", axis=1) | |
num_attribs = list(housing_num) | |
print(num_attribs) | |
num_attribs.remove('median_house_value') | |
print(num_attribs) | |
cat_attribs = ["ocean_proximity"] | |
# column index | |
rooms_ix, bedrooms_ix, population_ix, households_ix = 3, 4, 5, 6 | |
class CombinedAttributesAdder(BaseEstimator, TransformerMixin): | |
def __init__(self, add_bedrooms_per_room=True): # no *args or **kargs | |
self.add_bedrooms_per_room = add_bedrooms_per_room | |
def fit(self, X, y=None): | |
return self # nothing else to do | |
def transform(self, X): | |
rooms_per_household = X[:, rooms_ix] / X[:, households_ix] | |
population_per_household = X[:, population_ix] / X[:, households_ix] | |
if self.add_bedrooms_per_room: | |
bedrooms_per_room = X[:, bedrooms_ix] / X[:, rooms_ix] | |
return np.c_[X, rooms_per_household, population_per_household, | |
bedrooms_per_room] | |
else: | |
return np.c_[X, rooms_per_household, population_per_household] | |
num_pipeline = Pipeline([ | |
('imputer', SimpleImputer(strategy="median")), | |
('attribs_adder', CombinedAttributesAdder()), | |
('std_scaler', StandardScaler()), | |
]) | |
full_pipeline = ColumnTransformer([ | |
("num", num_pipeline, num_attribs), | |
("cat", OneHotEncoder(), cat_attribs), | |
]) | |
housing_prepared = full_pipeline.fit_transform(housing) | |
housing_filtered = housing[housing['median_house_value']<500000] | |
housing_labels_filtered = housing[housing['median_house_value']<500000]['median_house_value'] | |
housing_labels = housing['median_house_value'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment