Skip to content

Instantly share code, notes, and snippets.

@JRuumis
Created August 15, 2016 23:19
Show Gist options
  • Select an option

  • Save JRuumis/0b14539e2fcf88a08a3241acdf0341c4 to your computer and use it in GitHub Desktop.

Select an option

Save JRuumis/0b14539e2fcf88a08a3241acdf0341c4 to your computer and use it in GitHub Desktop.
When trying to create a DataFrameL Exception in thread "main" java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative path in absolute URI: file:C:/Developer/Scala%20Projects/Spark2/spark-warehouse
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.RandomForestClassifier
import org.apache.spark.ml.feature.{IndexToString, VectorIndexer, StringIndexer}
import org.apache.spark.sql.SparkSession
//import spark.implicits._
/**
* Created by Janis Rumnieks on 15/08/2016.
*/
object DigitRecognizer1 {
def main(args: Array[String]): Unit = {
//val trainDataFile = """C:\Developer\Kaggle\DigitRecogniser\train.csv"""
//val testDataFile = """C:\Developer\Kaggle\DigitRecogniser\test.csv"""
val trainDataFile = """/C:/Developer/Kaggle/DigitRecogniser/train.csv"""
val testDataFile = """/C:/Developer/Kaggle/DigitRecogniser/test.csv"""
//val trainDataFileURI = new java.io.File(trainDataFile).toURI
//val testDataFileURI = new java.io.File(testDataFile).toURI
//val trainDataFile = """file:///C:\Developer\Kaggle\DigitRecogniser\train.csv"""
//val testDataFile = """file:///C:\Developer\Kaggle\DigitRecogniser\test.csv"""
//val trainDataFile = "train.csv"
//val testDataFile = "test.csv"
//val trainDataFile = """///train.csv"""
//val testDataFile = """///test.csv"""
val spark = SparkSession
.builder()
.appName("Spark SQL - Digit Recognition")
.master("local[*]")
.getOrCreate()
//val trainDataFrame = spark.read.csv(trainDataFileURI.toString)
//val testDataFrame = spark.read.csv(testDataFileURI.toString)
val trainDataFrame = spark.read.csv(trainDataFile)
val testDataFrame = spark.read.csv(testDataFile)
val labelIndexer = new StringIndexer()
.setInputCol("label")
.setOutputCol("indexedLabel")
.fit(trainDataFrame)
val featureIndexer = new VectorIndexer()
.setInputCol("features")
.setOutputCol("indexedFeatures")
.setMaxCategories(10)
.fit(trainDataFrame)
///////val Array(trainingData, testData) = testDataFrame.randomSplit(Array(0.7, 0.3))
val rf = new RandomForestClassifier()
.setLabelCol("indexedLabel")
.setFeaturesCol("indexedPixel")
.setNumTrees(10)
val labelConverter = new IndexToString()
.setInputCol("prediction")
.setOutputCol("predictedLabel")
.setLabels(labelIndexer.labels)
val pipeline = new Pipeline()
.setStages(Array(labelIndexer, featureIndexer, rf, labelConverter))
val model = pipeline.fit(trainDataFrame)
val predictions = model.transform(testDataFrame)
//predictions.select("predictedLabel", "label", "features").show(5)
println(s"Predictions: ${predictions.count()}")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment