Skip to content

Instantly share code, notes, and snippets.

@TomLisankie
Created January 1, 2018 22:19
Show Gist options
  • Save TomLisankie/2eec0fc99f9fdaf3d2b5081f7983bed2 to your computer and use it in GitHub Desktop.
Save TomLisankie/2eec0fc99f9fdaf3d2b5081f7983bed2 to your computer and use it in GitHub Desktop.
#fit_model code:
# TODO: Import 'make_scorer', 'DecisionTreeRegressor', and 'GridSearchCV'
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
def fit_model(X, y):
""" Performs grid search over the 'max_depth' parameter for a
decision tree regressor trained on the input data [X, y]. """
# Create cross-validation sets from the training data
# sklearn version 0.18: ShuffleSplit(n_splits=10, test_size=0.1, train_size=None, random_state=None)
# sklearn versiin 0.17: ShuffleSplit(n, n_iter=10, test_size=0.1, train_size=None, random_state=None)
cv_sets = ShuffleSplit(n_splits=10, test_size = 0.20, random_state = 0)
# TODO: Create a decision tree regressor object
regressor = DecisionTreeRegressor()
# TODO: Create a dictionary for the parameter 'max_depth' with a range from 1 to 10
params = {'max_depth':[1,2,3,4,5,6,7,8,9,10]}
# TODO: Transform 'performance_metric' into a scoring function using 'make_scorer'
scoring_fnc = make_scorer(performance_metric)
# TODO: Create the grid search cv object --> GridSearchCV()
# Make sure to include the right parameters in the object:
# (estimator, param_grid, scoring, cv) which have values 'regressor', 'params', 'scoring_fnc', and 'cv_sets' respectively.
grid = GridSearchCV(regressor, params, scoring_fnc, cv_sets)
# Fit the grid search object to the data to compute the optimal model
grid = grid.fit(X, y)
# Return the optimal model after fitting the data
return grid.best_estimator_
# Fit the training data to the model using grid search
reg = fit_model(X_train, y_train)
# Produce the value for 'max_depth'
print "Parameter 'max_depth' is {} for the optimal model.".format(reg.get_params()['max_depth'])
'''
ERROR:
/usr/local/lib/python2.7/site-packages/sklearn/model_selection/_search.py:584: DeprecationWarning: "fit_params" as a constructor argument was deprecated in version 0.19 and will be removed in version 0.21. Pass fit parameters to the "fit" method instead.
'"fit" method instead.', DeprecationWarning)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-28-08a7af49600a> in <module>()
1 # Fit the training data to the model using grid search
----> 2 reg = fit_model(X_train, y_train)
3
4 # Produce the value for 'max_depth'
5 print "Parameter 'max_depth' is {} for the optimal model.".format(reg.get_params()['max_depth'])
<ipython-input-27-f20bc5181fdf> in fit_model(X, y)
28
29 # Fit the grid search object to the data to compute the optimal model
---> 30 grid = grid.fit(X, y)
31
32 # Return the optimal model after fitting the data
/usr/local/lib/python2.7/site-packages/sklearn/model_selection/_search.pyc in fit(self, X, y, groups, **fit_params)
637 error_score=self.error_score)
638 for parameters, (train, test) in product(candidate_params,
--> 639 cv.split(X, y, groups)))
640
641 # if one choose to see train score, "out" will contain train score info
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self, iterable)
777 # was dispatched. In particular this covers the edge
778 # case of Parallel used with an exhausted iterator.
--> 779 while self.dispatch_one_batch(iterator):
780 self._iterating = True
781 else:
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in dispatch_one_batch(self, iterator)
623 return False
624 else:
--> 625 self._dispatch(tasks)
626 return True
627
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in _dispatch(self, batch)
586 dispatch_timestamp = time.time()
587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588 job = self._backend.apply_async(batch, callback=cb)
589 self._jobs.append(job)
590
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.pyc in apply_async(self, func, callback)
109 def apply_async(self, func, callback=None):
110 """Schedule a func to be run"""
--> 111 result = ImmediateResult(func)
112 if callback:
113 callback(result)
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.pyc in __init__(self, batch)
330 # Don't delay the application, to avoid keeping the input
331 # arguments in memory
--> 332 self.results = batch()
333
334 def get(self):
/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
/usr/local/lib/python2.7/site-packages/sklearn/model_selection/_validation.pyc in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
437 fit_params = fit_params if fit_params is not None else {}
438 fit_params = dict([(k, _index_param_value(X, v, train))
--> 439 for k, v in fit_params.items()])
440
441 test_scores = {}
AttributeError: 'ShuffleSplit' object has no attribute 'items'
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment