Last active
November 16, 2018 18:21
-
-
Save soujiro32167/ce092ffc03f10ca35c745711be6ed911 to your computer and use it in GitHub Desktop.
Spark read write test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.byond.sparkperformance | |
import org.apache.spark.rdd.RDD | |
import org.apache.spark.sql.{Dataset, SaveMode, SparkSession} | |
object ReadWriteTest { | |
def main(args: Array[String]) { | |
val (file: String, factor: Int) = args match { | |
case Array(f, fc) => (f, fc.toInt) | |
case _ => "target/classes/sample.txt" | |
} | |
val spark = SparkSession | |
.builder | |
.getOrCreate() | |
import spark.implicits._ | |
val df = spark.read.textFile(file) | |
val count = df.count() | |
println(s"before muliply count: $count") | |
val multiplied = multiply(df, factor).map(s => s + "1") | |
println(s"after multiply count: ${multiplied.count}") | |
multiplied.coalesce(1).write.mode(SaveMode.Overwrite).text(file + ".transformed") | |
} | |
def multiply[T](ds: Dataset[T], times: Int): Dataset[T] = { | |
(1 to times).foldLeft(ds)((acc, _) => acc.union(ds)) | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
../spark-2.3.2-bin-hadoop2.7/bin/spark-submit \ | |
--deploy-mode cluster \ | |
--master spark://integrity-hadoop-client-spark-master:6066 \ | |
--class com.byond.sparkperformance.ReadWriteTest \ | |
/opt/spark/jars/rw.jar <file to read> <multiply factor> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment