Skip to content

Instantly share code, notes, and snippets.

View tzachz's full-sized avatar

Tzach Zohar tzachz

View GitHub Profile
@tzachz
tzachz / UserValidator.java
Created June 6, 2015 14:43
Functions should not have side effects (from Clean Code, p.44)
public class UserValidator {
private Cryptographer cryptographer;
public boolean checkPassword(String userName, String password) {
User user = UserGateway.findByName(userName);
if (user != User.NULL) {
String codedPhrase = user.getPhraseEncodedByPassword();
String phrase = cryptographer.decrypt(codedPhrase, password);
if ("Valid Password".equals(phrase)) {
Session.initialize();
@tzachz
tzachz / StaticClass.java
Created September 1, 2015 12:59
Java behavior when static constructor throws exception
public class StaticClass {
static {
throwsException();
}
private static void throwsException() {
throw new RuntimeException("bam!");
}
@tzachz
tzachz / SparkAppStats.scala
Created November 4, 2015 19:47
Spark REST API usage example: shuffle memory totals
import org.json4s._
import org.json4s.jackson.JsonMethods.parse
import scala.io.Source.fromURL
object SparkAppStats {
/**
* (partial) representation of a Spark Stage object
*/
case class SparkStage(name: String, shuffleWriteBytes: Long, memoryBytesSpilled: Long, diskBytesSpilled: Long)
@tzachz
tzachz / CounterBackedAccumulatorUtil.scala
Created November 4, 2015 21:06
Creating a Metrics Counter backed by a Spark Accumulator
package com.kenshoo.kripke.core
import com.yammer.metrics.Metrics
import com.yammer.metrics.core.{MetricName, Counter}
import org.apache.spark.Accumulator
import org.apache.spark.rdd.RDD
import scala.reflect.ClassTag
object CounterBackedAccumulatorUtil {
@tzachz
tzachz / SparkContextInstrumentor.scala
Created November 4, 2015 22:20
Expose Spark's cache memory status as Metrics Gauges
import com.yammer.metrics.Metrics
import com.yammer.metrics.core.Gauge
import org.apache.spark.SparkContext
/**
* Created by tzachz on 10/21/15
*/
object SparkContextInstrumentor {
def instrument(context: SparkContext): SparkContext = {
@tzachz
tzachz / SparkAppStats.scala
Created March 5, 2016 10:18
Spark REST API: calculate time per job name
import java.text.SimpleDateFormat
import java.util.Date
import org.json4s._
import org.json4s.jackson.JsonMethods.parse
import scala.io.Source.fromURL
object SparkAppStats {
val url = "http://<host>:4040/api/v1/applications/<app-name>/jobs"
@tzachz
tzachz / LocalDiskHealthCheck.scala
Last active July 22, 2016 04:07
Apache Spark: Local Disk HealthCheck
import java.io.{File, IOException}
import java.net.InetAddress
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.slf4j.{Logger, LoggerFactory}
import scala.util.{Failure, Try}
object LocalDiskHealthCheck {
@tzachz
tzachz / CombineMaps.scala
Last active January 26, 2023 04:31
Apache Spark UserDefinedAggregateFunction combining maps
import org.apache.spark.SparkContext
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, Row, SQLContext}
/***
* UDAF combining maps, overriding any duplicate key with "latest" value
* @param keyType DataType of Map key
* @param valueType DataType of Value key
* @param merge function to merge values of identical keys
@tzachz
tzachz / dependency-report.gradle
Created November 17, 2016 14:37
Gradle: multi-project dependency graph (supports depth > 2)
// Inspired by https://gist.github.com/abesto/cdcdd38263eacf1cbb51
// Task creates a .dot file with all inter-module dependencies
// Supports any depth of nested modules
task moduleDependencyReport {
doLast {
def file = new File("project-dependencies.dot")
file.delete()
file << "digraph {\n"
file << "splines=ortho\n"
AnodotReporterConfiguration anodotConf =
new DefaultAnodotReporterConfiguration("your-token", 60, "https://api.anodot.com/api/v1/metrics");
Anodot3ReporterBuilder.builderFor(anodotConf)
.build(metricRegistry)
.start()