Created
May 8, 2017 18:33
-
-
Save danking/e1ff5d2299f6d4b954f910e8a116e28f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 9cfae509b03a69de267fe58f15e1e902468f3bbf Mon Sep 17 00:00:00 2001 | |
From: Daniel King <[email protected]> | |
Date: Mon, 8 May 2017 11:12:30 -0400 | |
Subject: [PATCH] add timing tests | |
--- | |
.../is/hail/variant/GenericDatasetSuite.scala | 44 ++++++++++++++++++++++ | |
1 file changed, 44 insertions(+) | |
diff --git a/src/test/scala/is/hail/variant/GenericDatasetSuite.scala b/src/test/scala/is/hail/variant/GenericDatasetSuite.scala | |
index 00b9250..9e23057 100644 | |
--- a/src/test/scala/is/hail/variant/GenericDatasetSuite.scala | |
+++ b/src/test/scala/is/hail/variant/GenericDatasetSuite.scala | |
@@ -1,13 +1,57 @@ | |
package is.hail.variant | |
import is.hail.SparkSuite | |
+import is.hail.check.{Gen, Parameters, Prop} | |
import is.hail.check.Prop._ | |
import is.hail.utils._ | |
import is.hail.expr.{TDouble, TGenotype, TInt, TString, TStruct} | |
+import org.apache.commons.math3.random.RandomDataGenerator | |
import org.testng.annotations.Test | |
class GenericDatasetSuite extends SparkSuite { | |
+ private val runs = 10 | |
+ private def timeIt[T](g: Gen[VariantSampleMatrix[T]])( | |
+ write: (VariantSampleMatrix[T], String) => Unit, | |
+ read: String => VariantSampleMatrix[T]): (Double, Double) = { | |
+ var readTimes = Array[Long]() | |
+ | |
+ val rng = new RandomDataGenerator() | |
+ rng.reSeed(Prop.seed) | |
+ | |
+ val p = forAll(g) { gds => | |
+ val f = tmpDir.createTempFile(extension = "vds") | |
+ write(gds, f) | |
+ val (readBack, t) = time(read(f).rdd.map(_._2._2.size).fold(0)(_ + _)) | |
+ readTimes = readTimes :+ t | |
+ // readBack.same(gds) | |
+ true | |
+ }(Parameters(rng, 10000, runs)) | |
+ | |
+ val mean = readTimes.sum.toDouble / runs | |
+ val stddev = math.sqrt(readTimes.map(x => (x - mean) * (x - mean)).sum / runs) | |
+ | |
+ println(s"mean: $mean, stddev: $stddev") | |
+ (mean, stddev) | |
+ } | |
+ | |
+ @Test def timingTest() { | |
+ timeIt(VariantSampleMatrix.genGeneric(hc))(_.write(_), hc.readGDS(_)) | |
+ timeIt(VariantSampleMatrix.genGeneric(hc))(_.write(_), hc.readGDS(_, sitesOnly = true)) | |
+ timeIt(VariantSampleMatrix.genGeneric(hc))(_.write(_), hc.readGDS(_, sitesOnly = true, samplesOnly = true)) | |
+ timeIt(VariantSampleMatrix.genGeneric(hc))(_.write(_), hc.readGDS(_, samplesOnly = true)) | |
+ | |
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_), hc.readVDS(_)) | |
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_), hc.readVDS(_, sitesOnly = true)) | |
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_), hc.readVDS(_, sitesOnly = true, samplesOnly = true)) | |
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_), hc.readVDS(_, samplesOnly = true)) | |
+ | |
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_, parquetGenotypes = true), hc.readVDS(_)) | |
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_, parquetGenotypes = true), hc.readVDS(_, sitesOnly = true)) | |
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_, parquetGenotypes = true), hc.readVDS(_, sitesOnly = true, samplesOnly = true)) | |
+ timeIt(VariantSampleMatrix.gen(hc, VSMSubgen.realistic))(_.write(_, parquetGenotypes = true), hc.readVDS(_, samplesOnly = true)) | |
+ } | |
+ | |
@Test def testReadWrite() { | |
val path = tmpDir.createTempFile(extension = ".vds") | |
-- | |
2.10.1 (Apple Git-78) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment