Last active
August 29, 2015 14:05
-
-
Save erikerlandson/3bdc2904646141d44f39 to your computer and use it in GitHub Desktop.
test drop-sampling benchmarks with different collection types
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
scala> benchmark(p = 0.001, n=1000, m = 10000) | |
Array | |
Time using filter: 411 | |
Time using drop: 9 | |
Seq | |
Time using filter: 425 | |
Time using drop: 3 | |
IndexedSeq | |
Time using filter: 406 | |
Time using drop: 3 | |
List | |
Time using filter: 275 | |
Time using drop: 701 | |
Vector | |
Time using filter: 279 | |
Time using drop: 700 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SampleIteratorDrop[T](var data: Iterator[T], p: Double) extends Iterator[T] { | |
val q = 1.0-p | |
val lnq = Math.log(q) | |
def advance { | |
val u = Math.max(Math.random(), 1e-10) | |
val rlen = (Math.log(u)/lnq).toInt | |
data = data.drop(rlen) | |
} | |
advance | |
override def hasNext: Boolean = data.hasNext | |
override def next: T = { | |
val r = data.next | |
advance | |
r | |
} | |
} | |
def benchmark(n: Int = 1000, m: Int = 1000, p: Double = 0.5) { | |
var data:Seq[Int] = (1 to m).toArray | |
println("\nArray") | |
var start = System.currentTimeMillis() | |
(1 to n).foreach { _ => { | |
data.iterator.filter(_ => Math.random() < p).length | |
}} | |
println(s" Time using filter: ${System.currentTimeMillis() - start}") | |
start = System.currentTimeMillis() | |
(1 to n).foreach { _ => { | |
(new SampleIteratorDrop(data.iterator, p)).length | |
}} | |
println(s" Time using drop: ${System.currentTimeMillis() - start}") | |
data = (1 to m).toSeq | |
println("\nSeq") | |
start = System.currentTimeMillis() | |
(1 to n).foreach { _ => { | |
data.iterator.filter(_ => Math.random() < p).length | |
}} | |
println(s" Time using filter: ${System.currentTimeMillis() - start}") | |
start = System.currentTimeMillis() | |
(1 to n).foreach { _ => { | |
(new SampleIteratorDrop(data.iterator, p)).length | |
}} | |
println(s" Time using drop: ${System.currentTimeMillis() - start}") | |
data = (1 to m).toIndexedSeq | |
println("\nIndexedSeq") | |
start = System.currentTimeMillis() | |
(1 to n).foreach { _ => { | |
data.iterator.filter(_ => Math.random() < p).length | |
}} | |
println(s" Time using filter: ${System.currentTimeMillis() - start}") | |
start = System.currentTimeMillis() | |
(1 to n).foreach { _ => { | |
(new SampleIteratorDrop(data.iterator, p)).length | |
}} | |
println(s" Time using drop: ${System.currentTimeMillis() - start}") | |
data = (1 to m).toList | |
println("\nList") | |
start = System.currentTimeMillis() | |
(1 to n).foreach { _ => { | |
data.iterator.filter(_ => Math.random() < p).length | |
}} | |
println(s" Time using filter: ${System.currentTimeMillis() - start}") | |
start = System.currentTimeMillis() | |
(1 to n).foreach { _ => { | |
(new SampleIteratorDrop(data.iterator, p)).length | |
}} | |
println(s" Time using drop: ${System.currentTimeMillis() - start}") | |
data = (1 to m).toVector | |
println("\nVector") | |
start = System.currentTimeMillis() | |
(1 to n).foreach { _ => { | |
data.iterator.filter(_ => Math.random() < p).length | |
}} | |
println(s" Time using filter: ${System.currentTimeMillis() - start}") | |
start = System.currentTimeMillis() | |
(1 to n).foreach { _ => { | |
(new SampleIteratorDrop(data.iterator, p)).length | |
}} | |
println(s" Time using drop: ${System.currentTimeMillis() - start}") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment