Skip to content

Instantly share code, notes, and snippets.

@dgadiraju
Last active July 16, 2017 19:04
Show Gist options
  • Select an option

  • Save dgadiraju/b27d54565daefaf53491df19c123e8e7 to your computer and use it in GitHub Desktop.

Select an option

Save dgadiraju/b27d54565daefaf53491df19c123e8e7 to your computer and use it in GitHub Desktop.
package wordcount
import org.apache.spark.{SparkConf,SparkContext}
import org.apache.hadoop.fs._
/**
* Created by itversity on 31/05/17.
*/
object WordCount {
def main(args: Array[String]) = {
val conf = new SparkConf().setAppName("Word Count").setMaster("local")
val sc = new SparkContext(conf)
val fs = FileSystem.get(sc.hadoopConfiguration)
val inputPath = args(0)
val outputPath = args(1)
if(!fs.exists(new Path(inputPath))) {
println("Input path does not exist")
} else {
if (fs.exists(new Path(outputPath)))
fs.delete(new Path(outputPath), true)
sc.textFile(inputPath).
flatMap(_.split(" ")).
map((_, 1)).
reduceByKey(_ + _).
map(rec => rec._1 + "\t" + rec._2).
saveAsTextFile(outputPath)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment