Last active
January 29, 2019 15:34
-
-
Save pedroduartecosta/1d8eadf26c6cf7be1440ce18e28414a0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val lr = new LinearRegression() | |
.setLabelCol("DelayOutputVar") | |
.setFeaturesCol("features") | |
val paramGrid = new ParamGridBuilder() | |
.addGrid(lr.regParam, Array(0.1, 0.01)) | |
.addGrid(lr.fitIntercept) | |
.addGrid(lr.elasticNetParam, Array(0.0, 1.0)) | |
.build() | |
val steps:Array[org.apache.spark.ml.PipelineStage] = if(useCategorical){ | |
categoricalIndexers ++ categoricalEncoders ++ Array(assembler, lr) | |
}else{ | |
Array(assembler, lr) | |
} | |
val pipeline = new Pipeline().setStages(steps) | |
val tvs = new TrainValidationSplit() | |
.setEstimator(pipeline) | |
.setEvaluator(new RegressionEvaluator().setLabelCol("DelayOutputVar")) | |
.setEstimatorParamMaps(paramGrid) | |
.setTrainRatio(0.7) | |
val Array(training, test) = data.randomSplit(Array(0.70, 0.30), seed = 12345) | |
val model = tvs.fit(training) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment