benoitdescamps · May 8, 2018 16:47
diff --git a/SuccessiveHalving_illustration b/SuccessiveHalving_illustration
 class SuccessiveHalving(object):
    """Applies successhalving on a model for n configurations max r ressources.

    Args:
        estimator: object instance with subclass SHBaseEstimator:
            estimator wrapper
        n: integer:
            number of  hyperparameter configurations to explore

        r: integer:
            maximum number of ressources.

        param_grid: dict:
            Dictionary where the keys are parameters and values are distributions from which a parameter is to be sampled. Distributions either have to provide a ``rvs`` function to sample from them, or can be given as a list of values, where a uniform distribution is assumed. e.g.: This could be a multiple of boosting iterations
            must be of the form:
            {
            'param_1': distribution_n,
            etc...
            'param_n': distribution_n
            }
        seed: integer

        ressource_name: str
            Name of the ressource parameter
            e.g. for XGBClassifier this is 'n_estimators"
        ressource unit: int
            minimal step of the ressource.
            Example:
                for xgboost this could be n_estimators = 10

    """
    def __init__(self,estimator,n,r,param_grid,
                 ressource_name = 'n_estimators',
                 ressource_unit = 10,
                 scoring=None, n_jobs=1,cv=None,seed=0):
        self.estimator = estimator
        self.n = n
        self.r = r
        self.param_grid = param_grid
        self.ressource_name = ressource_name
        self.ressource_unit = ressource_unit
        self.seed = seed
        self.scoring = scoring
        self.n_jobs = n_jobs

        self.history = list()

    def apply(self,
              Xtrain,ytrain,Xval,yval
              ):
        """Apply Successive Halving:
             1. evaluate the performance of all conﬁgurations
             2. throw out the worst half
             3. return to 1. until one conﬁgurations remains.
            Args:
                Xtrain: array:
                    training data
                ytrain array:
                    training target
                Xval: array:
                    validation data
                yval: array:
                    validation target
            Returns:
                    best configuration

            """

        T = self._get_hyperparameter_configurations(self.n)

        first_fit =True

        eta = np.exp( np.log(self.r/float(self.ressource_unit))/math.floor(np.log(len(T))/np.log(2.)) )
        n_iterations = self.ressource_unit

        while (len(T) > 1):
            T = self._run_and_score_models(T,ri=int(n_iterations),Xtrain=Xtrain,ytrain=ytrain,
                                           Xval=Xval,yval=yval,first_fit=first_fit)
            T = self._get_top_k(T,k=math.ceil(len(T) / 2))
            n_iterations*= eta
            if first_fit:
                first_fit= False

        return T
	class SuccessiveHalving(object):
	"""Applies successhalving on a model for n configurations max r ressources.

	Args:
	estimator: object instance with subclass SHBaseEstimator:
	estimator wrapper
	n: integer:
	number of hyperparameter configurations to explore

	r: integer:
	maximum number of ressources.

	param_grid: dict:
	Dictionary where the keys are parameters and values are distributions from which a parameter is to be sampled. Distributions either have to provide a ``rvs`` function to sample from them, or can be given as a list of values, where a uniform distribution is assumed. e.g.: This could be a multiple of boosting iterations
	must be of the form:
	{
	'param_1': distribution_n,
	etc...
	'param_n': distribution_n
	}
	seed: integer

	ressource_name: str
	Name of the ressource parameter
	e.g. for XGBClassifier this is 'n_estimators"
	ressource unit: int
	minimal step of the ressource.
	Example:
	for xgboost this could be n_estimators = 10

	"""
	def __init__(self,estimator,n,r,param_grid,
	ressource_name = 'n_estimators',
	ressource_unit = 10,
	scoring=None, n_jobs=1,cv=None,seed=0):
	self.estimator = estimator
	self.n = n
	self.r = r
	self.param_grid = param_grid
	self.ressource_name = ressource_name
	self.ressource_unit = ressource_unit
	self.seed = seed
	self.scoring = scoring
	self.n_jobs = n_jobs

	self.history = list()

	def apply(self,
	Xtrain,ytrain,Xval,yval
	):
	"""Apply Successive Halving:
	1. evaluate the performance of all conﬁgurations
	2. throw out the worst half
	3. return to 1. until one conﬁgurations remains.
	Args:
	Xtrain: array:
	training data
	ytrain array:
	training target
	Xval: array:
	validation data
	yval: array:
	validation target
	Returns:
	best configuration

	"""

	T = self._get_hyperparameter_configurations(self.n)

	first_fit =True

	eta = np.exp( np.log(self.r/float(self.ressource_unit))/math.floor(np.log(len(T))/np.log(2.)) )
	n_iterations = self.ressource_unit

	while (len(T) > 1):
	T = self._run_and_score_models(T,ri=int(n_iterations),Xtrain=Xtrain,ytrain=ytrain,
	Xval=Xval,yval=yval,first_fit=first_fit)
	T = self._get_top_k(T,k=math.ceil(len(T) / 2))
	n_iterations*= eta
	if first_fit:
	first_fit= False

	return T