Created
November 14, 2016 19:07
-
-
Save Ichoran/94b4698905b905934dfd644f6cef7b4b to your computer and use it in GitHub Desktop.
Benchmarks for groupBy / map getOrElseUpdate slowdown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
resolvers += "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots" | |
lazy val root = (project in file(".")).settings( | |
name := "thyme-test", | |
version := "0.1.0", | |
scalaVersion := "2.11.8", | |
crossScalaVersions := Seq("2.11.8", "2.12.0"), | |
libraryDependencies += "com.github.ichoran" %% "thyme" % "0.1.2-SNAPSHOT" | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Build project with sbt '+ assembly' before running this script | |
# All comments are the results of benchmarking on my particular machine. Your results may differ. | |
# These commands give consistently slower benchmarks for 2.12 than 2.11 | |
# They run groupBy benchmarks on a 1k list and 1k vector | |
# -XX:MaxInlineLevel=16 results in better performance on 2.12 than 2.11! | |
# If you run only one (e.g. just gbv) then 2.11 wins again but only by ~5% | |
java -Xmx2G -cp target/scala-2.11/thyme-test-assembly-0.1.0.jar GroupByBench gbl gbv | |
java -Xmx2G -cp target/scala-2.12/thyme-test-assembly-0.1.0.jar GroupByBench gbl gbv | |
# These getOrElseUpdate benchmarks are also slower, though not quite enough to explain the above | |
# These are rescued by -XX:MaxInlineLevel=16 to only a ~10% slowdown | |
java -Xmx2G -cp target/scala-2.11/thyme-test-assembly-0.1.0.jar GroupByBench goeu | |
java -Xmx2G -cp target/scala-2.12/thyme-test-assembly-0.1.0.jar GroupByBench goeu | |
# This largely rescues the speed of "goeu", which is a getOrElseUpdate benchmark??? | |
# Maybe map get ends up more heavily optimized when it's used more inlined into the other tests? | |
# They're still all a little slower in 2.12 than 2.11 (~12%). | |
java -Xmx2G -cp target/scala-2.11/thyme-test-assembly-0.1.0.jar GroupByBench goeu gmp gipg | |
java -Xmx2G -cp target/scala-2.12/thyme-test-assembly-0.1.0.jar GroupByBench goeu gmp gipg | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ichi.bench._ | |
object GroupByBench { | |
class Sum(var value: Int = 0) { | |
def ++(): this.type = { value += 1; this } | |
} | |
val a1k = Array.tabulate(1024)(i => ((i*7) % 13).toString) | |
val l1k = a1k.toList | |
val v1k = a1k.toVector | |
val a10k = Array.tabulate(10240)(i => ((i*7) % 13).toString) | |
def gba(a: Array[String]): Int = { | |
val m = a.groupBy(x => x) | |
m(a(util.Random.nextInt(a.length))).head.length | |
} | |
def gbl(l: List[String], a: Array[String]): Int = { | |
val m = l.groupBy(x => x) | |
m(a(util.Random.nextInt(a.length))).head.length | |
} | |
def gbv(v: Vector[String], a: Array[String]): Int = { | |
val m = v.groupBy(x => x) | |
m(a(util.Random.nextInt(a.length))).head.length | |
} | |
def goeu(a: Array[String]): Int = { | |
val m = collection.mutable.Map.empty[String, Sum] | |
var i = 0 | |
while (i < a.length) { | |
m.getOrElseUpdate(a(i), new Sum()).++ | |
i += 1 | |
} | |
m(a(util.Random.nextInt(a.length))).value | |
} | |
def gmp(a: Array[String]): Int = { | |
val m = collection.mutable.Map.empty[String, Sum] | |
var i = 0 | |
while (i < a.length) { | |
m.get(a(i)) match { | |
case None => val s = new Sum(); m += ((a(i), s)); s.++ | |
case Some(s) => s.++ | |
} | |
i += 1 | |
} | |
m(a(util.Random.nextInt(a.length))).value | |
} | |
def gipg(a: Array[String]): Int = { | |
val m = collection.mutable.Map.empty[String, Sum] | |
var i = 0 | |
while (i < a.length) { | |
val x = m.get(a(i)) | |
if (x eq None) { | |
val s = new Sum(); m += ((a(i), s)); s.++ | |
} | |
else x.get.++ | |
i += 1 | |
} | |
m(a(util.Random.nextInt(a.length))).value | |
} | |
def x3[U](f: => U) { f; f; f } | |
def main(args: Array[String]) { | |
val th = Thyme warmed 0.03 | |
// What we could run | |
val possibilities: Map[String, (() => th.Warm[Int], String)] = Map( | |
"goeu" -> ((() => th.Warm{ (1 to 10).map(_ => goeu(a1k)).sum }, "1k array getOrElseUpdate")), | |
"gmp" -> ((() => th.Warm{ (1 to 10).map(_ => gmp(a1k)).sum }, "1k array get/match/+=")), | |
"gipg" -> ((() => th.Warm{ (1 to 10).map(_ => gipg(a1k)).sum }, "1k array get/if/+=/get")), | |
"gba" -> ((() => th.Warm{ (1 to 10).map(_ => gba(a1k)).sum }, "1k array groupBy")), | |
"gbl" -> ((() => th.Warm{ (1 to 10).map(_ => gbl(l1k, a1k)).sum }, "1k list groupBy")), | |
"gbv" -> ((() => th.Warm{ (1 to 10).map(_ => gbv(v1k, a1k)).sum }, "1k vector groupBy")) | |
) | |
// Warm up what we're going to run this time | |
val benchwork = args.distinct.flatMap{ arg => | |
possibilities.get(arg).map{ case (wmf, title) => (wmf(), title) } | |
} | |
if (benchwork.isEmpty) { | |
println("Wait, you didn't ask to run anything.") | |
println("List one or more of the following keys as arguments:") | |
possibilities.foreach{ case (key, (_, title) => | |
println(f" $key%20s - $title") | |
} | |
throw new Exception("No benchmark specified") | |
} | |
// Run it 3x (just to check it's consistent) | |
x3{ | |
var i = 0 | |
while (i < benchwork.length) { | |
val bi = benchwork(i) | |
th pbenchWarm(bi._1, 1, bi._2) | |
i += 1 | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment