Skip to content

Instantly share code, notes, and snippets.

@erikerlandson
Created August 29, 2014 02:55
Show Gist options
  • Save erikerlandson/66b42d96500589f25553 to your computer and use it in GitHub Desktop.
Save erikerlandson/66b42d96500589f25553 to your computer and use it in GitHub Desktop.
Experiment with faster sampling logic
scala> benchmark(p = 0.01, n=1000, m = 100000)
Time using drop: 83
Time using filter: 2849
scala> benchmark(p = 0.01, n=1000, m = 100000)
Time using drop: 90
Time using filter: 2668
scala> benchmark(p = 0.1, n=1000, m = 100000)
Time using drop: 941
Time using filter: 2970
scala> benchmark(p = 0.2, n=1000, m = 100000)
Time using drop: 1665
Time using filter: 2983
scala> benchmark(p = 0.4, n=1000, m = 100000)
Time using drop: 3795
Time using filter: 3386
class SampleIteratorDrop[T](var data: Iterator[T], p: Double) extends Iterator[T] {
val q = 1.0-p
val lnq = Math.log(q)
def advance {
val u = Math.max(Math.random(), 1e-10)
val rlen = (Math.log(Math.random())/lnq).toInt
data = data.drop(rlen)
}
advance
override def hasNext: Boolean = data.hasNext
override def next: T = {
val r = data.next
advance
r
}
}
def benchmark(n: Int = 1000000, m: Int = 1000000, p: Double = 0.5) {
val data = (1 to m).toArray
var start = System.currentTimeMillis()
(1 to n).foreach { _ => {
(new SampleIteratorDrop(data.iterator, p)).length
}}
println(s"Time using drop: ${System.currentTimeMillis() - start}")
start = System.currentTimeMillis()
(1 to n).foreach { _ => {
data.iterator.filter(_ => Math.random() < p).length
}}
println(s"Time using filter: ${System.currentTimeMillis() - start}")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment