Skip to content

Instantly share code, notes, and snippets.

@yujuwon
Created February 22, 2018 11:47
Show Gist options
  • Select an option

  • Save yujuwon/b41a62a7a391ae1e50f83c76d310308c to your computer and use it in GitHub Desktop.

Select an option

Save yujuwon/b41a62a7a391ae1e50f83c76d310308c to your computer and use it in GitHub Desktop.
package com.test
import org.apache.spark._
import org.apache.spark.SparkContext._
object WordCount {
def main(args: Array[String]): Unit = {
val master = args.length match {
case x: Int if x > 0 => args(0)
case _ => "local"
}
val sc = new SparkContext(master, "WordCount", System.getenv("SPARK_HOME"))
val input = args.length match {
case x: Int if x > 1 => sc.textFile(args(1))
case _ => sc.parallelize(List("pandas", "i like pandas"))
}
val words = input.flatMap(line => line.split(" "))
args.length match {
case x: Int if x > 2 => {
val counts = words.map(word => (word, 1)).reduceByKey { case (x, y) => x + y}
counts.saveAsTextFile(args(2))
}
case _ => {
val wc = words.countByValue()
println(wc.mkString(","))
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment