I hereby claim:
- I am benoitdescamps on github.
- I am bendesc (https://keybase.io/bendesc) on keybase.
- I have a public key ASCc_8P9Faka2-t-le6Gl-HtTVsKBjTMR_207ktVS57SOwo
To claim this, I am signing this object:
| class SHBaseEstimator(ABC): | |
| def __init__(self,model): | |
| self.model = model | |
| self.env = None | |
| def fit(self,X,y): | |
| self.model.fit(X,y) | |
| def predict(self,X): | |
| return self.model.predict(X) |
| class SHXGBEstimator(SHBaseEstimator): | |
| def __init__(self,model): | |
| self.model = model | |
| self.env = {'best_score':-np.infty,'best_iteration':-1,'earlier_stop':False} | |
| def update(self,Xtrain,ytrain,Xval,yval,scoring,n_iterations): | |
| dtrain = DMatrix(data=Xtrain,label=ytrain) | |
| for i in range(n_iterations-self.model.n_estimators): | |
| # note: | |
| # this is a get, but the internal booster in XGBClassifier is also updated | |
| # add unit test for controle if future updates |
| class SHSklearnEstimator(SHBaseEstimator): | |
| def __init__(self,model,ressource_name=None): | |
| self.model = model | |
| self.ressource_name = ressource_name | |
| self.env = None | |
| def update(self,Xtrain,ytrain,Xval,yval,scoring,n_iterations): | |
| self.set_params(**{'warm_start':True,self.ressource_name:n_iterations}) | |
| self.model.fit(Xtrain,ytrain) |
| class SHSklearnEstimator(SHBaseEstimator): | |
| def __init__(self,model,ressource_name=None): | |
| self.model = model | |
| self.ressource_name = ressource_name | |
| self.env = None | |
| def update(self,Xtrain,ytrain,Xval,yval,scoring,n_iterations): | |
| self.set_params(**{'warm_start':True,self.ressource_name:n_iterations}) | |
| self.model.fit(Xtrain,ytrain) |
| class SuccessiveHalving(object): | |
| """Applies successhalving on a model for n configurations max r ressources. | |
| Args: | |
| estimator: object instance with subclass SHBaseEstimator: | |
| estimator wrapper | |
| n: integer: | |
| number of hyperparameter configurations to explore | |
| r: integer: |
| import org.apache.spark.ml.Pipeline | |
| import org.apache.spark.ml.tuning.CrossValidatorModel | |
| import org.apache.spark.ml.param.ParamMap | |
| val pipeline: Pipeline = ... | |
| val paramGrid: Array[ParamMap] = new ParamGridBuilder(). | |
| addGrid(...). | |
| addGrid(...). | |
| build |
| val lr = new LogisticRegression().setMaxIter(10) | |
| val randomGrid = new RandomGridBuilder(10) | |
| .addDistr(lr.regParam,Gamma(0.5,0.1)) | |
| .addDistr(lr.elasticNetParam,Gamma(0.5,0.1)) | |
| .addDistr(lr.threshold,Gaussian(0.5,0.05)) | |
| .addDistr(lr.standardization,Array(true,false)) | |
| .build() |
| class RandomGridBuilder(n: Int) { | |
| private val paramDistr = mutable.Map.empty[Param[_],Any] | |
| def addDistr[T](param: Param[T], distr: Any ): this.type = distr match { | |
| case _ : Rand[_] => {paramDistr.put(param, distr) | |
| this} | |
| case _ : Array[_] => { paramDistr.put(param, distr) | |
| this} | |
| case _ => throw new NotImplementedError("Distribution should be of type breeze.stats.distributions.Rand or an Array") |
| import breeze.stats.distributions.{Gamma,Uniform,Poisson} | |
| import com.microsoft.ml.spark.LightGBMClassifier | |
| import tuning.RandomGridBuilder | |
| object example_lgbm{ | |
| def main(args: Array[String]): Unit = { | |
| val lgbm = new LightGBMClassifier() | |
| val randomGrid = new RandomGridBuilder(5) | |
| .addDistr(lgbm.learningRate,Gamma(1.0,0.1)) |
I hereby claim:
To claim this, I am signing this object: