Skip to content

Instantly share code, notes, and snippets.

@invkrh
Created June 9, 2017 14:07
Show Gist options
  • Save invkrh/6b5bca9202091fed31191f2db307f2a0 to your computer and use it in GitHub Desktop.
Save invkrh/6b5bca9202091fed31191f2db307f2a0 to your computer and use it in GitHub Desktop.
// below Q1 – 1.5 × IQR or above Q3 + 1.5 × IQR
val yieldUplift = deltaDF.select("delta_exploit_relative_yield_uplift").collect().map(_.getDouble(0)).toList.sorted
def getMedian(sorted: List[Double]): Double = {
val len = sorted.size
if (len % 2 == 0) {
(sorted(len / 2 - 1) + sorted(len / 2)) / 2
} else {
sorted(len / 2)
}
}
def IQR(sorted: List[Double], factor: Double = 1.5): (Double, Double) = {
val len = sorted.size
val median = getMedian(sorted)
val (q1, q3) = if (len % 2 == 0) {
val (low, high) = sorted.span(_ < median)
(getMedian(low), getMedian(high))
} else {
val low = sorted.takeWhile(_ < median)
val high = sorted.dropWhile(_ <= median)
(getMedian(low), getMedian(high))
}
val range = q3 - q1
(q1 - 1.5 * range, q3 + 1.5 * range)
}
val (lower, upper) = IQR(yieldUplift)
val upperDomains = deltaDF.select("domain").where($"delta_exploit_relative_yield_uplift" > upper).collect.map(_.getString(0))
val lowerDomains = deltaDF.select("domain").where($"delta_exploit_relative_yield_uplift" < lower).collect.map(_.getString(0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment