dgadiraju · June 1, 2017 08:07
diff --git a/spark-scala-word-count-config.scala b/spark-scala-word-count-config.scala
 package wordcount

 /**
  * Created by itversity on 25/03/17.
  * spark-submit
 spark-submit \
  --class WordCount \
  /Users/itversity/IdeaProjects/sands/target/scala-2.10/sands_2.10-1.0.jar \
  dev /Users/itversity/Research/data/wordcount.txt /Users/itversity/Research/data/wc
  */

 import org.apache.hadoop.fs._
 import com.typesafe.config.{Config, ConfigFactory}
 import org.apache.spark.{SparkConf, SparkContext}

 object WordCount {

  def main(args: Array[String]) = {
    val executionEnvironment = args(0)
    val props: Config = ConfigFactory.load()
    val conf = new SparkConf().
      setAppName("Word Count").
      setMaster(props.getConfig(executionEnvironment).getString("executionMode"))
    val sc = new SparkContext(conf)

    val fs = FileSystem.get(sc.hadoopConfiguration)

    val inputPath = args(1)
    val outputPath = args(2)

    if(!fs.exists(new Path(inputPath))) {
      println("Input path does not exist")
    } else {
      if(fs.exists(new Path(outputPath)))
        fs.delete(new Path(outputPath), true)
      sc.textFile(inputPath).
        flatMap(rec => rec.split(" ")).
        map(rec => (rec.replace(",", ""), 1)). 
     // above line can be added if you want to discard punctuation marks while performing word count
        reduceByKey(_ + _).
        map(rec => rec.productIterator.mkString("\t")).
     // alternative - map(rec => rec._1 + "\t" + rec._2).
        saveAsTextFile(outputPath)
    }
  }
 }
	package wordcount

	/**
	* Created by itversity on 25/03/17.
	* spark-submit
	spark-submit \
	--class WordCount \
	/Users/itversity/IdeaProjects/sands/target/scala-2.10/sands_2.10-1.0.jar \
	dev /Users/itversity/Research/data/wordcount.txt /Users/itversity/Research/data/wc
	*/

	import org.apache.hadoop.fs._
	import com.typesafe.config.{Config, ConfigFactory}
	import org.apache.spark.{SparkConf, SparkContext}

	object WordCount {

	def main(args: Array[String]) = {
	val executionEnvironment = args(0)
	val props: Config = ConfigFactory.load()
	val conf = new SparkConf().
	setAppName("Word Count").
	setMaster(props.getConfig(executionEnvironment).getString("executionMode"))
	val sc = new SparkContext(conf)

	val fs = FileSystem.get(sc.hadoopConfiguration)

	val inputPath = args(1)
	val outputPath = args(2)

	if(!fs.exists(new Path(inputPath))) {
	println("Input path does not exist")
	} else {
	if(fs.exists(new Path(outputPath)))
	fs.delete(new Path(outputPath), true)
	sc.textFile(inputPath).
	flatMap(rec => rec.split(" ")).
	map(rec => (rec.replace(",", ""), 1)).
	// above line can be added if you want to discard punctuation marks while performing word count
	reduceByKey(_ + _).
	map(rec => rec.productIterator.mkString("\t")).
	// alternative - map(rec => rec._1 + "\t" + rec._2).
	saveAsTextFile(outputPath)
	}
	}
	}