Skip to content

Instantly share code, notes, and snippets.

@erikerlandson
Created January 26, 2018 21:35
Show Gist options
  • Save erikerlandson/6f48fcf8dcc02cbcb6ed2ac066c4c813 to your computer and use it in GitHub Desktop.
Save erikerlandson/6f48fcf8dcc02cbcb6ed2ac066c4c813 to your computer and use it in GitHub Desktop.
pseudocode for a Scala topk-monoid built on count-min-sketch monoid
// type parameter V is type of object values being counted
// class parameters are 'val'; this class is immutable
class TopK[V](
val k: Int,
val cms: CountMinSketch[V],
val topk: immutable.Map[V, Int],
val fmin: Int) {
// update the TopK sketch w/ a new element 'v'
def +(v: V): TopK[V] = {
val ucms = this.cms + v
val vf = ucms.frequency(v)
val (utopk, ufmin) = if (topk.size < k) {
(topk + (v, vf), math.min(vf, fmin))
} else if (vf <= fmin) (topk, fmin) else {
val del = map.minBy { case (_, f) => f }
((topk - del._1) + (v, vf), map.values.min)
}
new TopK[V](k, ucms, utopk, ufmin)
}
// combine two TopK sketches, monoidally
def ++(that: TopK[V]): TopK[V] = {
val ucms = this.cms ++ that.cms
val vu = this.topk.keys.toSet ++ that.topk.keys.toSet
val (utopk, ufmin) = vu.foldLeft((Map.empty[V, Int], 0)) { case (v, (tk, fm)) =>
val vf = ucms.frequency(v)
if (tk.size < k) {
(tk + (v, vf), math.min(vf, fm))
} else if (vf <= fm) (tk, fm) else {
val del = map.minBy { case (_, f) => f }
((tk - del._1) + (v, vf), map.values.min)
}
}
new TopK[K](k, ucms, utopk, ufmin)
}
}
object TopK {
def empty[V](k: Int) = new TopK[V](k, CountMinSketch.empty[V], immutable.Map.empty[V, Int], 0)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment