Last active
August 29, 2015 14:07
-
-
Save miguno/02309debbc4f0754f0a1 to your computer and use it in GitHub Desktop.
Negative example of TopNCMS (a top-N CMS variant) failing to compute heavy hitters correctly
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This unit test will fail because merging top-N based heavy hitters | |
// is not associative; see https://github.com/twitter/algebird/issues/353 | |
"compute heavy hitters correctly (regression test of GH-353)" in { | |
val topN = 2 | |
val monoid = TopNCMS.monoid(EPS, DELTA, SEED, topN) | |
val data1 = Seq(1, 1, 1, 2, 2, 3).toK[K] | |
val data2 = Seq(3, 4, 4, 4, 5, 5).toK[K] | |
val data3 = Seq(3, 6, 6, 6, 7, 7).toK[K] | |
val data4 = Seq(3, 8, 8, 8, 9, 9).toK[K] | |
val singleData = data1 ++ data2 ++ data3 ++ data4 | |
/* | |
Data sets from above shown in tabular view | |
Item 1 2 3 4 total (= singleData) | |
---------------------------------------- | |
A (1) 3 - - - 3 | |
B (2) 2 - - - 2 | |
C (3) 1 1 1 1 4 <<< C is global top 1 heavy hitter | |
D (4) - 3 - - 3 | |
E (5) - 2 - - 2 | |
F (6) - - 3 - 3 | |
G (7) - - 2 - 2 | |
H (8) - - - 3 3 | |
I (9) - - - 2 2 | |
*/ | |
val cms1 = monoid.create(data1) | |
val cms2 = monoid.create(data2) | |
val cms3 = monoid.create(data3) | |
val cms4 = monoid.create(data4) | |
val aggregated = cms1 ++ cms2 ++ cms3 ++ cms4 | |
val single = monoid.create(singleData) | |
// The two asserts below will both fail | |
aggregated.heavyHitters should be(single.heavyHitters) | |
aggregated.heavyHitters contains(3.toK[K]) // C=3 is global top 1 heavy hitter | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment