Created
May 8, 2018 16:47
-
-
Save benoitdescamps/26c79a02beebf461df3e2290ea8deb24 to your computer and use it in GitHub Desktop.
code snippet for Tuning Hyperparameters (part I): SuccessiveHalving
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SuccessiveHalving(object): | |
"""Applies successhalving on a model for n configurations max r ressources. | |
Args: | |
estimator: object instance with subclass SHBaseEstimator: | |
estimator wrapper | |
n: integer: | |
number of hyperparameter configurations to explore | |
r: integer: | |
maximum number of ressources. | |
param_grid: dict: | |
Dictionary where the keys are parameters and values are distributions from which a parameter is to be sampled. Distributions either have to provide a ``rvs`` function to sample from them, or can be given as a list of values, where a uniform distribution is assumed. e.g.: This could be a multiple of boosting iterations | |
must be of the form: | |
{ | |
'param_1': distribution_n, | |
etc... | |
'param_n': distribution_n | |
} | |
seed: integer | |
ressource_name: str | |
Name of the ressource parameter | |
e.g. for XGBClassifier this is 'n_estimators" | |
ressource unit: int | |
minimal step of the ressource. | |
Example: | |
for xgboost this could be n_estimators = 10 | |
""" | |
def __init__(self,estimator,n,r,param_grid, | |
ressource_name = 'n_estimators', | |
ressource_unit = 10, | |
scoring=None, n_jobs=1,cv=None,seed=0): | |
self.estimator = estimator | |
self.n = n | |
self.r = r | |
self.param_grid = param_grid | |
self.ressource_name = ressource_name | |
self.ressource_unit = ressource_unit | |
self.seed = seed | |
self.scoring = scoring | |
self.n_jobs = n_jobs | |
self.history = list() | |
def apply(self, | |
Xtrain,ytrain,Xval,yval | |
): | |
"""Apply Successive Halving: | |
1. evaluate the performance of all configurations | |
2. throw out the worst half | |
3. return to 1. until one configurations remains. | |
Args: | |
Xtrain: array: | |
training data | |
ytrain array: | |
training target | |
Xval: array: | |
validation data | |
yval: array: | |
validation target | |
Returns: | |
best configuration | |
""" | |
T = self._get_hyperparameter_configurations(self.n) | |
first_fit =True | |
eta = np.exp( np.log(self.r/float(self.ressource_unit))/math.floor(np.log(len(T))/np.log(2.)) ) | |
n_iterations = self.ressource_unit | |
while (len(T) > 1): | |
T = self._run_and_score_models(T,ri=int(n_iterations),Xtrain=Xtrain,ytrain=ytrain, | |
Xval=Xval,yval=yval,first_fit=first_fit) | |
T = self._get_top_k(T,k=math.ceil(len(T) / 2)) | |
n_iterations*= eta | |
if first_fit: | |
first_fit= False | |
return T |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment