Skip to content

Instantly share code, notes, and snippets.

@ldacosta
Created December 16, 2016 20:45
Show Gist options
  • Save ldacosta/b475008959a1009a0ede64469528ce45 to your computer and use it in GitHub Desktop.
Save ldacosta/b475008959a1009a0ede64469528ce45 to your computer and use it in GitHub Desktop.
package com.mediative.mpn.brain.datascience.platform
package nationals.recommendations.strategy
import com.cra.figaro.library.atomic.continuous.{ AtomicNormal, Normal }
import com.holdenkarau.spark.testing.{ DatasetGenerator, DatasetSuiteBase }
import com.mediative.mpn.brain.datascience.platform.nationals.generators
import org.apache.spark.sql.Dataset
import org.scalacheck.Gen
import org.scalatest.FreeSpec
import org.scalatest.prop.GeneratorDrivenPropertyChecks
case class MyCC(mean: Double, standardDev: Double) {
val probabilityD: AtomicNormal = Normal(mean, math.pow(standardDev, 2))
// 95% interval:
val interval95: (Double, Double) =
(probabilityD.mean - 2 * probabilityD.standardDeviation, probabilityD.mean + 2 * probabilityD.standardDeviation)
}
object AtomicNormalTest {
val MyCCGen: Gen[MyCC] = for {
m <- Gen.choose(1.0, 10.0)
s <- Gen.choose(1.0, 10.0)
} yield MyCC(mean = m, standardDev = s)
}
import com.mediative.mpn.brain.datascience.platform.nationals.recommendations.strategy.AtomicNormalTest._
class AtomicNormalTest extends FreeSpec with DatasetSuiteBase with GeneratorDrivenPropertyChecks {
implicit override val generatorDrivenConfig = PropertyCheckConfig(minSuccessful = 5)
"test " - {
"direct sampling" in {
forAll(MyCCGen) { cc =>
val (iMin, iMax) = cc.interval95
assert(iMin + iMax >= iMin) // whatever condition. I just need no-one to be lazy.
}
}
"dataset sampling" in {
val myCCDatasetGen: Gen[Dataset[MyCC]] = {
import spark.implicits._
DatasetGenerator
.genDataset[MyCC](sqlContext)(MyCCGen)
.suchThat(_.count > 0)
}
forAll(myCCDatasetGen) { aSet =>
// aSet.show()
assert(aSet.count() >= 0)
}
}
}
}
@ldacosta
Copy link
Author

The test called "direct sampling" never fails. The test called "dataset sampling" fails occasionally (ie, not consistently) with a stack trace that looks like the following:

org.scalatest.exceptions.GeneratorDrivenPropertyCheckFailedException: SparkException was thrown during property evaluation. Message: Job aborted due to stage failure: Task 15 in stage 6.0 failed 1 times, most recent failure: Lost task 15.0 in stage 6.0 (TID 84, localhost): java.lang.ArrayIndexOutOfBoundsException: 259 at com.cra.figaro.util.HashSelectableSet.contains(SelectableSet.scala:142) at scala.collection.mutable.SetLike$class.add(SetLike.scala:81) at com.cra.figaro.util.HashSelectableSet.add(SelectableSet.scala:37) at com.cra.figaro.language.Universe.activate(Universe.scala:175) at com.cra.figaro.language.Element.<init>(Element.scala:480) at com.cra.figaro.library.atomic.continuous.AtomicNormal.<init>(Normal.scala:24) at com.cra.figaro.library.atomic.continuous.Normal$.apply(Normal.scala:140) at com.mediative.mpn.brain.datascience.platform.nationals.recommendations.strategy.MyCC.<init>(AtomicNormalTest.scala:13) at com.mediative.mpn.brain.datascience.platform.nationals.recommendations.strategy.AtomicNormalTest$$anonfun$2$$anonfun$apply$1.apply(AtomicNormalTest.scala:24) at com.mediative.mpn.brain.datascience.platform.nationals.recommendations.strategy.AtomicNormalTest$$anonfun$2$$anonfun$apply$1.apply(AtomicNormalTest.scala:23) at scala.Option.map(Option.scala:146)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment