TomLisankie · January 1, 2018 22:19
diff --git a/boston.py b/boston.py
 #fit_model code:

 # TODO: Import 'make_scorer', 'DecisionTreeRegressor', and 'GridSearchCV'
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.metrics import make_scorer
 from sklearn.model_selection import GridSearchCV

 def fit_model(X, y):
    """ Performs grid search over the 'max_depth' parameter for a 
        decision tree regressor trained on the input data [X, y]. """
    
    # Create cross-validation sets from the training data
    # sklearn version 0.18: ShuffleSplit(n_splits=10, test_size=0.1, train_size=None, random_state=None)
    # sklearn versiin 0.17: ShuffleSplit(n, n_iter=10, test_size=0.1, train_size=None, random_state=None)
    cv_sets = ShuffleSplit(n_splits=10, test_size = 0.20, random_state = 0)

    # TODO: Create a decision tree regressor object
    regressor = DecisionTreeRegressor()

    # TODO: Create a dictionary for the parameter 'max_depth' with a range from 1 to 10
    params = {'max_depth':[1,2,3,4,5,6,7,8,9,10]}

    # TODO: Transform 'performance_metric' into a scoring function using 'make_scorer' 
    scoring_fnc = make_scorer(performance_metric)

    # TODO: Create the grid search cv object --> GridSearchCV()
    # Make sure to include the right parameters in the object:
    # (estimator, param_grid, scoring, cv) which have values 'regressor', 'params', 'scoring_fnc', and 'cv_sets' respectively.
    grid = GridSearchCV(regressor, params, scoring_fnc, cv_sets)

    # Fit the grid search object to the data to compute the optimal model
    grid = grid.fit(X, y)

    # Return the optimal model after fitting the data
    return grid.best_estimator_
  
 # Fit the training data to the model using grid search
 reg = fit_model(X_train, y_train)

 # Produce the value for 'max_depth'
 print "Parameter 'max_depth' is {} for the optimal model.".format(reg.get_params()['max_depth'])




 '''
 ERROR:

 /usr/local/lib/python2.7/site-packages/sklearn/model_selection/_search.py:584: DeprecationWarning: "fit_params" as a constructor argument was deprecated in version 0.19 and will be removed in version 0.21. Pass fit parameters to the "fit" method instead.
  '"fit" method instead.', DeprecationWarning)

 ---------------------------------------------------------------------------
 AttributeError                            Traceback (most recent call last)
 <ipython-input-28-08a7af49600a> in <module>()
      1 # Fit the training data to the model using grid search
 ----> 2 reg = fit_model(X_train, y_train)
      3 
      4 # Produce the value for 'max_depth'
      5 print "Parameter 'max_depth' is {} for the optimal model.".format(reg.get_params()['max_depth'])

 <ipython-input-27-f20bc5181fdf> in fit_model(X, y)
     28 
     29     # Fit the grid search object to the data to compute the optimal model
 ---> 30     grid = grid.fit(X, y)
     31 
     32     # Return the optimal model after fitting the data

 /usr/local/lib/python2.7/site-packages/sklearn/model_selection/_search.pyc in fit(self, X, y, groups, **fit_params)
    637                                   error_score=self.error_score)
    638           for parameters, (train, test) in product(candidate_params,
 --> 639                                                    cv.split(X, y, groups)))
    640 
    641         # if one choose to see train score, "out" will contain train score info

 /usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self, iterable)
    777             # was dispatched. In particular this covers the edge
    778             # case of Parallel used with an exhausted iterator.
 --> 779             while self.dispatch_one_batch(iterator):
    780                 self._iterating = True
    781             else:

 /usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in dispatch_one_batch(self, iterator)
    623                 return False
    624             else:
 --> 625                 self._dispatch(tasks)
    626                 return True
    627 

 /usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in _dispatch(self, batch)
    586         dispatch_timestamp = time.time()
    587         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
 --> 588         job = self._backend.apply_async(batch, callback=cb)
    589         self._jobs.append(job)
    590 

 /usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.pyc in apply_async(self, func, callback)
    109     def apply_async(self, func, callback=None):
    110         """Schedule a func to be run"""
 --> 111         result = ImmediateResult(func)
    112         if callback:
    113             callback(result)

 /usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.pyc in __init__(self, batch)
    330         # Don't delay the application, to avoid keeping the input
    331         # arguments in memory
 --> 332         self.results = batch()
    333 
    334     def get(self):

 /usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self)
    129 
    130     def __call__(self):
 --> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

 /usr/local/lib/python2.7/site-packages/sklearn/model_selection/_validation.pyc in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
    437     fit_params = fit_params if fit_params is not None else {}
    438     fit_params = dict([(k, _index_param_value(X, v, train))
 --> 439                       for k, v in fit_params.items()])
    440 
    441     test_scores = {}

 AttributeError: 'ShuffleSplit' object has no attribute 'items'



 '''
	#fit_model code:

	# TODO: Import 'make_scorer', 'DecisionTreeRegressor', and 'GridSearchCV'
	from sklearn.tree import DecisionTreeRegressor
	from sklearn.metrics import make_scorer
	from sklearn.model_selection import GridSearchCV

	def fit_model(X, y):
	""" Performs grid search over the 'max_depth' parameter for a
	decision tree regressor trained on the input data [X, y]. """

	# Create cross-validation sets from the training data
	# sklearn version 0.18: ShuffleSplit(n_splits=10, test_size=0.1, train_size=None, random_state=None)
	# sklearn versiin 0.17: ShuffleSplit(n, n_iter=10, test_size=0.1, train_size=None, random_state=None)
	cv_sets = ShuffleSplit(n_splits=10, test_size = 0.20, random_state = 0)

	# TODO: Create a decision tree regressor object
	regressor = DecisionTreeRegressor()

	# TODO: Create a dictionary for the parameter 'max_depth' with a range from 1 to 10
	params = {'max_depth':[1,2,3,4,5,6,7,8,9,10]}

	# TODO: Transform 'performance_metric' into a scoring function using 'make_scorer'
	scoring_fnc = make_scorer(performance_metric)

	# TODO: Create the grid search cv object --> GridSearchCV()
	# Make sure to include the right parameters in the object:
	# (estimator, param_grid, scoring, cv) which have values 'regressor', 'params', 'scoring_fnc', and 'cv_sets' respectively.
	grid = GridSearchCV(regressor, params, scoring_fnc, cv_sets)

	# Fit the grid search object to the data to compute the optimal model
	grid = grid.fit(X, y)

	# Return the optimal model after fitting the data
	return grid.best_estimator_

	# Fit the training data to the model using grid search
	reg = fit_model(X_train, y_train)

	# Produce the value for 'max_depth'
	print "Parameter 'max_depth' is {} for the optimal model.".format(reg.get_params()['max_depth'])




	'''
	ERROR:

	/usr/local/lib/python2.7/site-packages/sklearn/model_selection/_search.py:584: DeprecationWarning: "fit_params" as a constructor argument was deprecated in version 0.19 and will be removed in version 0.21. Pass fit parameters to the "fit" method instead.
	'"fit" method instead.', DeprecationWarning)

	---------------------------------------------------------------------------
	AttributeError Traceback (most recent call last)
	<ipython-input-28-08a7af49600a> in <module>()
	1 # Fit the training data to the model using grid search
	----> 2 reg = fit_model(X_train, y_train)
	3
	4 # Produce the value for 'max_depth'
	5 print "Parameter 'max_depth' is {} for the optimal model.".format(reg.get_params()['max_depth'])

	<ipython-input-27-f20bc5181fdf> in fit_model(X, y)
	28
	29 # Fit the grid search object to the data to compute the optimal model
	---> 30 grid = grid.fit(X, y)
	31
	32 # Return the optimal model after fitting the data

	/usr/local/lib/python2.7/site-packages/sklearn/model_selection/_search.pyc in fit(self, X, y, groups, **fit_params)
	637 error_score=self.error_score)
	638 for parameters, (train, test) in product(candidate_params,
	--> 639 cv.split(X, y, groups)))
	640
	641 # if one choose to see train score, "out" will contain train score info

	/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self, iterable)
	777 # was dispatched. In particular this covers the edge
	778 # case of Parallel used with an exhausted iterator.
	--> 779 while self.dispatch_one_batch(iterator):
	780 self._iterating = True
	781 else:

	/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in dispatch_one_batch(self, iterator)
	623 return False
	624 else:
	--> 625 self._dispatch(tasks)
	626 return True
	627

	/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in _dispatch(self, batch)
	586 dispatch_timestamp = time.time()
	587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
	--> 588 job = self._backend.apply_async(batch, callback=cb)
	589 self._jobs.append(job)
	590

	/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.pyc in apply_async(self, func, callback)
	109 def apply_async(self, func, callback=None):
	110 """Schedule a func to be run"""
	--> 111 result = ImmediateResult(func)
	112 if callback:
	113 callback(result)

	/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.pyc in __init__(self, batch)
	330 # Don't delay the application, to avoid keeping the input
	331 # arguments in memory
	--> 332 self.results = batch()
	333
	334 def get(self):

	/usr/local/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self)
	129
	130 def __call__(self):
	--> 131 return [func(args, *kwargs) for func, args, kwargs in self.items]
	132
	133 def __len__(self):

	/usr/local/lib/python2.7/site-packages/sklearn/model_selection/_validation.pyc in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
	437 fit_params = fit_params if fit_params is not None else {}
	438 fit_params = dict([(k, _index_param_value(X, v, train))
	--> 439 for k, v in fit_params.items()])
	440
	441 test_scores = {}

	AttributeError: 'ShuffleSplit' object has no attribute 'items'



	'''