Created
January 1, 2018 22:19
-
-
Save TomLisankie/2eec0fc99f9fdaf3d2b5081f7983bed2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#fit_model code: | |
# TODO: Import 'make_scorer', 'DecisionTreeRegressor', and 'GridSearchCV' | |
from sklearn.tree import DecisionTreeRegressor | |
from sklearn.metrics import make_scorer | |
from sklearn.model_selection import GridSearchCV | |
def fit_model(X, y): | |
""" Performs grid search over the 'max_depth' parameter for a | |
decision tree regressor trained on the input data [X, y]. """ | |
# Create cross-validation sets from the training data | |
# sklearn version 0.18: ShuffleSplit(n_splits=10, test_size=0.1, train_size=None, random_state=None) | |
# sklearn versiin 0.17: ShuffleSplit(n, n_iter=10, test_size=0.1, train_size=None, random_state=None) | |
cv_sets = ShuffleSplit(n_splits=10, test_size = 0.20, random_state = 0) | |
# TODO: Create a decision tree regressor object | |
regressor = DecisionTreeRegressor() | |
# TODO: Create a dictionary for the parameter 'max_depth' with a range from 1 to 10 | |
params = {'max_depth':[1,2,3,4,5,6,7,8,9,10]} | |
# TODO: Transform 'performance_metric' into a scoring function using 'make_scorer' | |
scoring_fnc = make_scorer(performance_metric) | |
# TODO: Create the grid search cv object --> GridSearchCV() | |
# Make sure to include the right parameters in the object: | |
# (estimator, param_grid, scoring, cv) which have values 'regressor', 'params', 'scoring_fnc', and 'cv_sets' respectively. | |
grid = GridSearchCV(regressor, params, scoring_fnc, cv_sets) | |
# Fit the grid search object to the data to compute the optimal model | |
grid = grid.fit(X, y) | |
# Return the optimal model after fitting the data | |
return grid.best_estimator_ | |
# Fit the training data to the model using grid search | |
reg = fit_model(X_train, y_train) | |
# Produce the value for 'max_depth' | |
print "Parameter 'max_depth' is {} for the optimal model.".format(reg.get_params()['max_depth']) | |
''' | |
ERROR: | |
/usr/local/lib/python2.7/site-packages/sklearn/model_selection/_search.py:584: DeprecationWarning: "fit_params" as a constructor argument was deprecated in version 0.19 and will be removed in version 0.21. Pass fit parameters to the "fit" method instead. | |
'"fit" method instead.', DeprecationWarning) | |
--------------------------------------------------------------------------- | |
AttributeError Traceback (most recent call last) | |
<ipython-input-28-08a7af49600a> in <module>() | |
1 # Fit the training data to the model using grid search | |
----> 2 reg = fit_model(X_train, y_train) | |
3 | |
4 # Produce the value for 'max_depth' | |
5 print "Parameter 'max_depth' is {} for the optimal model.".format(reg.get_params()['max_depth']) | |
<ipython-input-27-f20bc5181fdf> in fit_model(X, y) | |
28 | |
29 # Fit the grid search object to the data to compute the optimal model | |
---> 30 grid = grid.fit(X, y) | |
31 | |
32 # Return the optimal model after fitting the data | |
/usr/local/lib/python2.7/site-packages/sklearn/model_selection/_search.pyc in fit(self, X, y, groups, **fit_params) | |
637 error_score=self.error_score) | |
638 for parameters, (train, test) in product(candidate_params, | |
--> 639 cv.split(X, y, groups))) | |
640 | |
641 # if one choose to see train score, "out" will contain train score info | |
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self, iterable) | |
777 # was dispatched. In particular this covers the edge | |
778 # case of Parallel used with an exhausted iterator. | |
--> 779 while self.dispatch_one_batch(iterator): | |
780 self._iterating = True | |
781 else: | |
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in dispatch_one_batch(self, iterator) | |
623 return False | |
624 else: | |
--> 625 self._dispatch(tasks) | |
626 return True | |
627 | |
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in _dispatch(self, batch) | |
586 dispatch_timestamp = time.time() | |
587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self) | |
--> 588 job = self._backend.apply_async(batch, callback=cb) | |
589 self._jobs.append(job) | |
590 | |
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.pyc in apply_async(self, func, callback) | |
109 def apply_async(self, func, callback=None): | |
110 """Schedule a func to be run""" | |
--> 111 result = ImmediateResult(func) | |
112 if callback: | |
113 callback(result) | |
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.pyc in __init__(self, batch) | |
330 # Don't delay the application, to avoid keeping the input | |
331 # arguments in memory | |
--> 332 self.results = batch() | |
333 | |
334 def get(self): | |
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self) | |
129 | |
130 def __call__(self): | |
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] | |
132 | |
133 def __len__(self): | |
/usr/local/lib/python2.7/site-packages/sklearn/model_selection/_validation.pyc in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score) | |
437 fit_params = fit_params if fit_params is not None else {} | |
438 fit_params = dict([(k, _index_param_value(X, v, train)) | |
--> 439 for k, v in fit_params.items()]) | |
440 | |
441 test_scores = {} | |
AttributeError: 'ShuffleSplit' object has no attribute 'items' | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment