Skip to content

Instantly share code, notes, and snippets.

@darionyaphet
Created March 3, 2017 05:42
Show Gist options
  • Save darionyaphet/1af965a6cb6d734034fda42facc553ce to your computer and use it in GitHub Desktop.
Save darionyaphet/1af965a6cb6d734034fda42facc553ce to your computer and use it in GitHub Desktop.
Simple WordCount SparkJobServer Example
package org.darion.yaphet.jobs
import com.typesafe.config.Config
import org.apache.spark.SparkContext
import spark.jobserver.{SparkJob, SparkJobInvalid, SparkJobValid, SparkJobValidation}
import scala.util.Try
/**
* Created by darionwang on 2017/3/3.
*/
object WordCountExample extends SparkJob {
/**
* This is the entry point for a Spark Job Server to execute Spark jobs.
* This function should create or reuse RDDs and return the result at the end, which the
* Job Server will cache or display.
*
* @param sc a SparkContext or similar for the job. May be reused across jobs.
* @param jobConfig the Typesafe Config object passed into the job request
* @return the job result
*/
override def runJob(sc: SparkContext, jobConfig: Config): SparkJobValidation = {
Try(jobConfig.getString("input.string"))
.map(x => SparkJobValid)
.getOrElse(SparkJobInvalid("No input.string"))
}
/**
* This method is called by the job server to allow jobs to validate their input and reject
* invalid job requests. If SparkJobInvalid is returned, then the job server returns 400
* to the user.
* NOTE: this method should return very quickly. If it responds slowly then the job server may time out
* trying to start this job.
*
* @return either SparkJobValid or SparkJobInvalid
*/
override def validate(context: SparkContext,
config: Config): SparkJobValidation = {
val dataSet = context.parallelize(config.getString("input.string").split(" ").toSeq)
dataSet.map((_, 1)).reduceByKey(_ + _).collect().toMap
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment