Skip to content

Instantly share code, notes, and snippets.

@erikerlandson
Last active August 29, 2015 14:05
Show Gist options
  • Save erikerlandson/3bdc2904646141d44f39 to your computer and use it in GitHub Desktop.
Save erikerlandson/3bdc2904646141d44f39 to your computer and use it in GitHub Desktop.
test drop-sampling benchmarks with different collection types
scala> benchmark(p = 0.001, n=1000, m = 10000)
Array
Time using filter: 411
Time using drop: 9
Seq
Time using filter: 425
Time using drop: 3
IndexedSeq
Time using filter: 406
Time using drop: 3
List
Time using filter: 275
Time using drop: 701
Vector
Time using filter: 279
Time using drop: 700
class SampleIteratorDrop[T](var data: Iterator[T], p: Double) extends Iterator[T] {
val q = 1.0-p
val lnq = Math.log(q)
def advance {
val u = Math.max(Math.random(), 1e-10)
val rlen = (Math.log(u)/lnq).toInt
data = data.drop(rlen)
}
advance
override def hasNext: Boolean = data.hasNext
override def next: T = {
val r = data.next
advance
r
}
}
def benchmark(n: Int = 1000, m: Int = 1000, p: Double = 0.5) {
var data:Seq[Int] = (1 to m).toArray
println("\nArray")
var start = System.currentTimeMillis()
(1 to n).foreach { _ => {
data.iterator.filter(_ => Math.random() < p).length
}}
println(s" Time using filter: ${System.currentTimeMillis() - start}")
start = System.currentTimeMillis()
(1 to n).foreach { _ => {
(new SampleIteratorDrop(data.iterator, p)).length
}}
println(s" Time using drop: ${System.currentTimeMillis() - start}")
data = (1 to m).toSeq
println("\nSeq")
start = System.currentTimeMillis()
(1 to n).foreach { _ => {
data.iterator.filter(_ => Math.random() < p).length
}}
println(s" Time using filter: ${System.currentTimeMillis() - start}")
start = System.currentTimeMillis()
(1 to n).foreach { _ => {
(new SampleIteratorDrop(data.iterator, p)).length
}}
println(s" Time using drop: ${System.currentTimeMillis() - start}")
data = (1 to m).toIndexedSeq
println("\nIndexedSeq")
start = System.currentTimeMillis()
(1 to n).foreach { _ => {
data.iterator.filter(_ => Math.random() < p).length
}}
println(s" Time using filter: ${System.currentTimeMillis() - start}")
start = System.currentTimeMillis()
(1 to n).foreach { _ => {
(new SampleIteratorDrop(data.iterator, p)).length
}}
println(s" Time using drop: ${System.currentTimeMillis() - start}")
data = (1 to m).toList
println("\nList")
start = System.currentTimeMillis()
(1 to n).foreach { _ => {
data.iterator.filter(_ => Math.random() < p).length
}}
println(s" Time using filter: ${System.currentTimeMillis() - start}")
start = System.currentTimeMillis()
(1 to n).foreach { _ => {
(new SampleIteratorDrop(data.iterator, p)).length
}}
println(s" Time using drop: ${System.currentTimeMillis() - start}")
data = (1 to m).toVector
println("\nVector")
start = System.currentTimeMillis()
(1 to n).foreach { _ => {
data.iterator.filter(_ => Math.random() < p).length
}}
println(s" Time using filter: ${System.currentTimeMillis() - start}")
start = System.currentTimeMillis()
(1 to n).foreach { _ => {
(new SampleIteratorDrop(data.iterator, p)).length
}}
println(s" Time using drop: ${System.currentTimeMillis() - start}")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment