Created
March 5, 2016 10:18
-
-
Save tzachz/2e484273eaa85291c7a9 to your computer and use it in GitHub Desktop.
Spark REST API: calculate time per job name
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.text.SimpleDateFormat | |
import java.util.Date | |
import org.json4s._ | |
import org.json4s.jackson.JsonMethods.parse | |
import scala.io.Source.fromURL | |
object SparkAppStats { | |
val url = "http://<host>:4040/api/v1/applications/<app-name>/jobs" | |
/** | |
* (partial) representation of a Spark Stage object | |
*/ | |
case class SparkJob(jobId: Int, name: String, submissionTime: Date, completionTime: Option[Date], stageIds: List[Int]) { | |
def getDurationMillis: Option[Long] = completionTime.map(_.getTime - submissionTime.getTime) | |
} | |
implicit val formats = new DefaultFormats { | |
override def dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ") | |
} | |
/** | |
* Prints total millis and avg millis per job name, e.g. | |
* TIME: 182570 AVG: 16597 NAME: count at MyAggregator.scala:132 | |
* TIME: 230973 AVG: 1297 NAME: parquet at MyRepository.scala:99 | |
* TIME: 120393 AVG: 2188 NAME: collect at MyCollector.scala:30 | |
*/ | |
def main (args: Array[String]) { | |
val json = fromURL(url + "/jobs").mkString | |
val completedJobs: List[SparkJob] = parse(json) | |
.extract[List[SparkJob]] | |
.filter(j => j.getDurationMillis.isDefined) | |
completedJobs | |
.groupBy(_.name) | |
.mapValues(list => (list.map(_.getDurationMillis.get).sum, list.size)) | |
.foreach { case (name, (time, count)) => println(s"TIME: $time\tAVG: ${time / count}\tNAME: $name") } | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment