Skip to content

Instantly share code, notes, and snippets.

@johnynek
Created May 7, 2015 01:50
Show Gist options
  • Save johnynek/699f2e10a0ad55e2a069 to your computer and use it in GitHub Desktop.
Save johnynek/699f2e10a0ad55e2a069 to your computer and use it in GitHub Desktop.
simple streaming median
import scala.collection.immutable.Queue
object Median {
sealed trait State {
def count: Long
def add(item: Long): State
def median: Long
}
case class Start(window: Queue[Long], max: Int, countInt: Int) extends State {
def count = countInt
def add(item: Long) = if (count == max) {
val swin = (window :+ item).toList.sorted
val sampleMedian = swin((countInt + 1)/2)
// here is the current gap:
val error = (swin((countInt + 1)/2 + 1) - swin((countInt + 1)/2 - 1))/2
End(swin((countInt + 1)/2), error, countInt + 1)
}
else Start(window :+ item, max, countInt + 1)
def median = window.toList.sorted.apply(countInt/2)
}
case class End(median: Long, step: Long, count: Long) extends State {
def nextStep: Long = math.max(step - step/count, 1.0).toLong
def add(item: Long) =
if (item > median) End(median + step, nextStep, count + 1L)
else if (item < median) End(median - step, nextStep, count + 1L)
else End(median, nextStep, count + 1L)
}
def init(max: Int): State = Start(Queue.empty, max, 0)
}
import java.util.Random
val rng = new Random
val size = 1000000
val data = (0 until size).map { _ => rng.nextInt(100000).toLong }
def sample: Long =
util.Random.shuffle(data).foldLeft(Median.init(1000)) { (s, v) => s.add(v) }.median
val trueM = data.toList.sorted.apply(size/2)
println("true: " + trueM)
val trials = 20
val results = (0 until trials).map(_ => sample)
println(results)
println(math.sqrt(results.map { e => (e - trueM).toDouble/trueM }.map(x => x*x).sum/trials))
/**
scala median.scala
true: 50017
Vector(48553, 48812, 48938, 48912, 48326, 52850, 51291, 48164, 50267, 50026, 49546, 50762, 50240, 51225, 47295, 51018, 50791, 49397, 53490, 49049)
0.030539443429986924
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment