Skip to content

Instantly share code, notes, and snippets.

@tstone
Last active August 29, 2015 14:02
Show Gist options
  • Save tstone/564f62c2bd6657b56f51 to your computer and use it in GitHub Desktop.
Save tstone/564f62c2bd6657b56f51 to your computer and use it in GitHub Desktop.
A little performance test for two approaches to resolving duplicates between two lists
// -- Measuring ---
def time[A](label: String, count: Int = 100)(block: => A): Long = {
// the first test run on the console is compiling it
block
// Take the average of <count> runs
(1 to count).toSeq.map { i =>
val t0 = System.nanoTime()
block
val t1 = System.nanoTime()
t1 - t0
}.sum / count
}
case class SubData(id: Int)
case class Data(value: String, sub: SubData)
// -- Setup ---
val sub1 = SubData(1)
val sub2 = SubData(2)
val sub3 = SubData(3)
val sub4 = SubData(4)
val sub5 = SubData(5)
val dataSet1 = Seq(
Data("foo", sub1),
Data("bar", sub2),
Data("baz", sub3)
)
val dataSet2 = Seq(
Data("lorem", sub4),
Data("foo", sub1),
Data("ipsum", sub5)
)
// -- Test1 ---
def filterFirst = time("filter first") {
val idsFromSet1 = dataSet1.map(_.sub.id)
val dataSet2Prime = dataSet2.filter { d =>
d.sub.id != 0 &&
!idsFromSet1.contains(d.sub.id)
}
dataSet1 ++ dataSet2
}
// -- Test2 ---
def distinctAfter = time("distinct after") {
val dataSet2Prime = dataSet2.filter(_.sub.id != 0)
(dataSet1 ++ dataSet2Prime).distinct
}
@tstone
Copy link
Author

tstone commented May 30, 2014

Results for my machine:

filterFirst = 5650
distinctAfter = 19830

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment