Created
August 15, 2016 23:19
-
-
Save JRuumis/0b14539e2fcf88a08a3241acdf0341c4 to your computer and use it in GitHub Desktop.
When trying to create a DataFrameL
Exception in thread "main" java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative path in absolute URI: file:C:/Developer/Scala%20Projects/Spark2/spark-warehouse
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import org.apache.spark.ml.Pipeline | |
| import org.apache.spark.ml.classification.RandomForestClassifier | |
| import org.apache.spark.ml.feature.{IndexToString, VectorIndexer, StringIndexer} | |
| import org.apache.spark.sql.SparkSession | |
| //import spark.implicits._ | |
| /** | |
| * Created by Janis Rumnieks on 15/08/2016. | |
| */ | |
| object DigitRecognizer1 { | |
| def main(args: Array[String]): Unit = { | |
| //val trainDataFile = """C:\Developer\Kaggle\DigitRecogniser\train.csv""" | |
| //val testDataFile = """C:\Developer\Kaggle\DigitRecogniser\test.csv""" | |
| val trainDataFile = """/C:/Developer/Kaggle/DigitRecogniser/train.csv""" | |
| val testDataFile = """/C:/Developer/Kaggle/DigitRecogniser/test.csv""" | |
| //val trainDataFileURI = new java.io.File(trainDataFile).toURI | |
| //val testDataFileURI = new java.io.File(testDataFile).toURI | |
| //val trainDataFile = """file:///C:\Developer\Kaggle\DigitRecogniser\train.csv""" | |
| //val testDataFile = """file:///C:\Developer\Kaggle\DigitRecogniser\test.csv""" | |
| //val trainDataFile = "train.csv" | |
| //val testDataFile = "test.csv" | |
| //val trainDataFile = """///train.csv""" | |
| //val testDataFile = """///test.csv""" | |
| val spark = SparkSession | |
| .builder() | |
| .appName("Spark SQL - Digit Recognition") | |
| .master("local[*]") | |
| .getOrCreate() | |
| //val trainDataFrame = spark.read.csv(trainDataFileURI.toString) | |
| //val testDataFrame = spark.read.csv(testDataFileURI.toString) | |
| val trainDataFrame = spark.read.csv(trainDataFile) | |
| val testDataFrame = spark.read.csv(testDataFile) | |
| val labelIndexer = new StringIndexer() | |
| .setInputCol("label") | |
| .setOutputCol("indexedLabel") | |
| .fit(trainDataFrame) | |
| val featureIndexer = new VectorIndexer() | |
| .setInputCol("features") | |
| .setOutputCol("indexedFeatures") | |
| .setMaxCategories(10) | |
| .fit(trainDataFrame) | |
| ///////val Array(trainingData, testData) = testDataFrame.randomSplit(Array(0.7, 0.3)) | |
| val rf = new RandomForestClassifier() | |
| .setLabelCol("indexedLabel") | |
| .setFeaturesCol("indexedPixel") | |
| .setNumTrees(10) | |
| val labelConverter = new IndexToString() | |
| .setInputCol("prediction") | |
| .setOutputCol("predictedLabel") | |
| .setLabels(labelIndexer.labels) | |
| val pipeline = new Pipeline() | |
| .setStages(Array(labelIndexer, featureIndexer, rf, labelConverter)) | |
| val model = pipeline.fit(trainDataFrame) | |
| val predictions = model.transform(testDataFrame) | |
| //predictions.select("predictedLabel", "label", "features").show(5) | |
| println(s"Predictions: ${predictions.count()}") | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment