Created
July 12, 2016 08:53
-
-
Save mtzl/b220f7bd9e2433fc0701f3df3ac16988 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
============================================================== | |
Grid Search over multiple Pipeline Configurations & Estimators | |
============================================================== | |
This example constructs a pipeline that does an optional prescaling step, | |
then dimensionality reduction followed by prediction with a decision tree | |
ensemble. It demonstrates the use of GridSearchCV and Pipeline to optimize | |
over different classes of estimators in a single CV run -- Gradient Boosted | |
Decision Trees are compared to Random Forests during the grid search. | |
""" | |
# Authors: Robert McGibbon, Joel Nothman, Moritz Lotze | |
from __future__ import print_function, division | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.datasets import load_iris | |
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier | |
from sklearn.metrics import accuracy_score | |
from sklearn.model_selection import GridSearchCV, cross_val_predict | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import RobustScaler | |
from sklearn.decomposition import PCA | |
from sklearn.feature_selection import SelectKBest, f_classif | |
print(__doc__) | |
pipe = Pipeline([ | |
('prescale', RobustScaler()), | |
('reduce_dim', PCA()), | |
('classify', RandomForestClassifier()) | |
]) | |
N_FEATURES_OPTIONS = [2, 4, ] | |
param_grid = [ | |
{ | |
'prescale': [RobustScaler(), None], | |
'reduce_dim': [PCA(iterated_power=7), ], | |
'reduce_dim__n_components': N_FEATURES_OPTIONS, | |
'classify': [RandomForestClassifier(), GradientBoostingClassifier()], | |
}, | |
{ | |
'reduce_dim': [SelectKBest(f_classif)], | |
'reduce_dim__k': N_FEATURES_OPTIONS, | |
'classify': [RandomForestClassifier(), GradientBoostingClassifier()], | |
}, | |
] | |
grid = GridSearchCV(pipe, cv=3, n_jobs=8, param_grid=param_grid) | |
iris = load_iris() | |
grid.fit(iris.data, iris.target) | |
preds = cross_val_predict(grid, iris.data, iris.target) | |
accs = accuracy_score(preds, iris.target) | |
print(accs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment