Skip to content

Instantly share code, notes, and snippets.

@marmbrus
Last active December 30, 2015 00:42
Show Gist options
  • Save marmbrus/31e2bfc639c17204b496 to your computer and use it in GitHub Desktop.
Save marmbrus/31e2bfc639c17204b496 to your computer and use it in GitHub Desktop.
import org.apache.spark.SparkConf
import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions.UnsafeRow
case class Test(i1: Int, i2: Int, i3: Int, i4: Int, i5: Int, i6: Int, i7: Int, i8: Int)
object EncoderBenchmark {
val runCount = 10000000
def benchmark(a: => Unit) {
val results = (1 to 5).map { _ =>
val startTime = System.nanoTime()
var i = 0
while (i < runCount) {
i += 1
a
}
val endTime = System.nanoTime()
val runtime = (endTime - startTime).toDouble / 1000000 / 1000
println(s"Runtime: $runtime")
runtime
}
val average = results.sum / results.size
println(s"average: $average")
println(s"rec/sec: ${runCount/average}")
}
def main(args: Array[String]): Unit = {
val kryo = new org.apache.spark.serializer.KryoSerializer(new org.apache.spark.SparkConf()).newInstance()
val instance = Test(1,2,3,4,5,6,7,8)
benchmark(kryo.deserialize[Test](kryo.serialize(instance)))
println(s"size: ${kryo.serialize(instance).array().length}")
val java = new JavaSerializer(new SparkConf()).newInstance()
benchmark(java.deserialize[Test](java.serialize(instance)))
println(s"size: ${java.serialize(instance).array().length}")
val encoder = ExpressionEncoder[Test]().defaultBinding
benchmark(encoder.fromRow(encoder.toRow(instance)))
println(s"size: ${encoder.toRow(instance).asInstanceOf[UnsafeRow].getSizeInBytes}")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment