miguno miguno

See the discussion in Algebird pull request #354.

> cappi::benchmarkOnly com.twitter.algebird.caliper.CMSBenchmark

Summary

The benchmark results indicate that @specialized(Int, Long) K does not improve the CMS[K] performance for Long.

Before @specialized, for Long:

	hduser@ubuntu1:/usr/local/hadoop/conf$ cat core-site.xml
	<?xml version="1.0"?>
	<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

	<!-- Put site-specific property overrides in this file. -->

	<configuration>
	<property>
	<name>hadoop.tmp.dir</name>
	<value>/app/hadoop/tmp</value>

	name := "kafka-0.8-with-scala-2.10-example"

	version := "1.0"

	scalaVersion := "2.10.3"

	sbtVersion := "0.13.0"

	// Required to retrieve Kafka 0.8.0-rc5 artifacts; once Kafka 0.8.0 is officially released
	// you don't need to use the Apache Staging repo anymore.

	mode = 'django'
	bind = '127.0.0.1:8081'
	pythonpath = '/usr/lib/python2.6/site-packages/graphite'
	preload_app = True
	workers = 2
	timeout = 30
	django_settings = 'settings'

	import java.io.IOException;
	import java.util.Properties;
	import kafka.server.KafkaConfig;
	import kafka.server.KafkaServerStartable;


	public class KafkaLocal {

	public KafkaServerStartable kafka;
	public ZooKeeperLocal zookeeper;

	/**
	* @author Danny Miller [email protected]
	*/

	import java.util.Collection;
	import java.util.Iterator;
	import java.util.LinkedList;
	import java.util.List;

	import backtype.storm.task.OutputCollector;

	// Copyright 2014 BrightTag, Inc. All rights reserved.
	package com.brighttag.storm.utils;

	import backtype.storm.tuple.Values;
	import storm.trident.operation.BaseFunction;
	import storm.trident.operation.TridentCollector;
	import storm.trident.tuple.TridentTuple;

	/**
	* Converts the first tuple from a byte array into a string.

	package com.miguno.algebird.extensions

	import com.twitter.algebird.{Approximate, Monoid, MonoidAggregator}

	import scala.collection.immutable.SortedSet

	class GenCountMinSketchMonoid[K: Ordering : GenCMSHasher](eps: Double, delta: Double, seed: Int,
	heavyHittersPct: Double = 0.01) extends Monoid[GenCMS[K]] {

	assert(0 < eps && eps < 1, "eps must lie in (0, 1)")

	package com.twitter.algebird.caliper

	import com.google.caliper.api.Macrobenchmark
	import com.google.caliper.{Param, Benchmark}
	import com.twitter.algebird.CountMinSketchMonoid

	/**
	* Run via [[com.twiter.algebird.caliper.Runner]], using the following CLI options:
	*
	* "--time-limit 90s --instrument macro com.twitter.algebird.caliper.CMSBenchmark"

	// This unit test will fail because merging top-N based heavy hitters
	// is not associative; see https://github.com/twitter/algebird/issues/353
	"compute heavy hitters correctly (regression test of GH-353)" in {
	val topN = 2
	val monoid = TopNCMS.monoid(EPS, DELTA, SEED, topN)

	val data1 = Seq(1, 1, 1, 2, 2, 3).toK[K]
	val data2 = Seq(3, 4, 4, 4, 5, 5).toK[K]
	val data3 = Seq(3, 6, 6, 6, 7, 7).toK[K]
	val data4 = Seq(3, 8, 8, 8, 9, 9).toK[K]