Skip to content

Instantly share code, notes, and snippets.

@bigsnarfdude
Last active September 6, 2017 11:14
Show Gist options
  • Save bigsnarfdude/07a28061b0faf48b6b73 to your computer and use it in GitHub Desktop.
Save bigsnarfdude/07a28061b0faf48b6b73 to your computer and use it in GitHub Desktop.
algebird hll serialized via redis repl
import com.twitter.algebird.HyperLogLog
import com.twitter.algebird.HyperLogLogMonoid
import com.twitter.algebird.HLL
import org.apache.commons.codec.binary.Base64
import com.twitter.chill.algebird.AlgebirdRegistrar
import com.twitter.chill.{KryoPool, ScalaKryoInstantiator}
// base 64 encoded string
// val data = List(1, 1, 2, 2, 3, 3, 4, 4, 5, 5)
// 12 bit HLL
val hllHash = "AQBjb20udHdpdHRlci5hbGdlYmlyZC5TcGFyc2VITMwRAww9CAGyBQJiBwKUCAJcDQE="
def loadListInt(data: List[Int]): com.twitter.algebird.HLL = {
import com.twitter.algebird.HyperLogLog.int2Bytes
val hll = new HyperLogLogMonoid(12)
val seqHll = data.map { hll(_) }
val returnHll = hll.sum(seqHll)
returnHll
}
val data = List(1, 1, 2, 2, 3, 3, 4, 4, 5, 5)
val dataLoadedHLL = loadListInt(data)
dataLoadedHLL.estimatedSize
// generates List of numbers between 1 and 100,000
def random_numbers(numbersRequired: Int, rangeNumber: Int) = {
var generator = new scala.util.Random
1 to numbersRequired map { _ => generator.nextInt(rangeNumber) }
}
val oneMillionRandomNumbers = random_numbers(1000000, 100000).toList
val moarRandomNumbers = random_numbers(100000, 200000).toList
val unqiueNumbers = oneMillionRandomNumbers.toSet.size
val overlappingNUmbers = moarRandomNumbers.toSet.size
object KryoEnv {
val KryoPoolSize = 10
val kryo = {
val inst = () ⇒ {
val newK = (new ScalaKryoInstantiator).newKryo()
newK.setReferences(false)
(new AlgebirdRegistrar).apply(newK)
newK
}
KryoPool.withByteArrayOutputStream(KryoPoolSize, inst)
}
}
val kryo = KryoEnv.kryo
def decodeString(hllHash:String): com.twitter.algebird.HLL = {
// decode the string into bytes
val bytes = Base64.decodeBase64(hllHash)
// hhh is a Hyperloglog data structure
val hyperll = kryo.fromBytes(bytes).asInstanceOf[HLL]
// return hyperll
hyperll
}
def encodeString(hll: HLL): String = {
// takes hll and converts to bytes
val bytes: Array[Byte] = kryo.toBytesWithClass(hll)
// encode to Base64
val encoded = Base64.encodeBase64(bytes)
// create String object as Base64 encoded HLL
new String(encoded)
}
def plusHLLs(nowHLL: HLL, storedHLL: HLL): com.twitter.algebird.HLL = {
nowHLL + storedHLL
}
val hll1 = loadListInt(oneMillionRandomNumbers)
val hll2 = loadListInt(moarRandomNumbers)
val combined = plusHLLs(hll1, hll2)
combined.estimatedSize
val hll = decodeString(hllHash)
val string_hll = encodeString(hll)
import com.redis._
import serialization._
import Parse.Implicits._
import Parse.Implicits.parseByteArray
val r = new RedisClient("localhost", 6379)
val s = r.get[Array[Byte]]("key")
val tryDecode: scala.util.Try[Any] = KryoInjection.invert(s.get)
val mmm = tryDecode.get.asInstanceOf[com.twitter.algebird.HLL]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment