Skip to content

Instantly share code, notes, and snippets.

@danking
Created May 8, 2017 18:33
Show Gist options
  • Save danking/e1ff5d2299f6d4b954f910e8a116e28f to your computer and use it in GitHub Desktop.
Save danking/e1ff5d2299f6d4b954f910e8a116e28f to your computer and use it in GitHub Desktop.
From 9cfae509b03a69de267fe58f15e1e902468f3bbf Mon Sep 17 00:00:00 2001
From: Daniel King <[email protected]>
Date: Mon, 8 May 2017 11:12:30 -0400
Subject: [PATCH] add timing tests
---
.../is/hail/variant/GenericDatasetSuite.scala | 44 ++++++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/src/test/scala/is/hail/variant/GenericDatasetSuite.scala b/src/test/scala/is/hail/variant/GenericDatasetSuite.scala
index 00b9250..9e23057 100644
--- a/src/test/scala/is/hail/variant/GenericDatasetSuite.scala
+++ b/src/test/scala/is/hail/variant/GenericDatasetSuite.scala
@@ -1,13 +1,57 @@
package is.hail.variant
import is.hail.SparkSuite
+import is.hail.check.{Gen, Parameters, Prop}
import is.hail.check.Prop._
import is.hail.utils._
import is.hail.expr.{TDouble, TGenotype, TInt, TString, TStruct}
+import org.apache.commons.math3.random.RandomDataGenerator
import org.testng.annotations.Test
class GenericDatasetSuite extends SparkSuite {
+ private val runs = 10
+ private def timeIt[T](g: Gen[VariantSampleMatrix[T]])(
+ write: (VariantSampleMatrix[T], String) => Unit,
+ read: String => VariantSampleMatrix[T]): (Double, Double) = {
+ var readTimes = Array[Long]()
+
+ val rng = new RandomDataGenerator()
+ rng.reSeed(Prop.seed)
+
+ val p = forAll(g) { gds =>
+ val f = tmpDir.createTempFile(extension = "vds")
+ write(gds, f)
+ val (readBack, t) = time(read(f).rdd.map(_._2._2.size).fold(0)(_ + _))
+ readTimes = readTimes :+ t
+ // readBack.same(gds)
+ true
+ }(Parameters(rng, 10000, runs))
+
+ val mean = readTimes.sum.toDouble / runs
+ val stddev = math.sqrt(readTimes.map(x => (x - mean) * (x - mean)).sum / runs)
+
+ println(s"mean: $mean, stddev: $stddev")
+ (mean, stddev)
+ }
+
+ @Test def timingTest() {
+ timeIt(VariantSampleMatrix.genGeneric(hc))(_.write(_), hc.readGDS(_))
+ timeIt(VariantSampleMatrix.genGeneric(hc))(_.write(_), hc.readGDS(_, sitesOnly = true))
+ timeIt(VariantSampleMatrix.genGeneric(hc))(_.write(_), hc.readGDS(_, sitesOnly = true, samplesOnly = true))
+ timeIt(VariantSampleMatrix.genGeneric(hc))(_.write(_), hc.readGDS(_, samplesOnly = true))
+
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_), hc.readVDS(_))
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_), hc.readVDS(_, sitesOnly = true))
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_), hc.readVDS(_, sitesOnly = true, samplesOnly = true))
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_), hc.readVDS(_, samplesOnly = true))
+
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_, parquetGenotypes = true), hc.readVDS(_))
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_, parquetGenotypes = true), hc.readVDS(_, sitesOnly = true))
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_, parquetGenotypes = true), hc.readVDS(_, sitesOnly = true, samplesOnly = true))
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_, parquetGenotypes = true), hc.readVDS(_, samplesOnly = true))
+ }
+
@Test def testReadWrite() {
val path = tmpDir.createTempFile(extension = ".vds")
--
2.10.1 (Apple Git-78)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment