Last active
September 6, 2017 11:14
-
-
Save bigsnarfdude/07a28061b0faf48b6b73 to your computer and use it in GitHub Desktop.
algebird hll serialized via redis repl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.twitter.algebird.HyperLogLog | |
import com.twitter.algebird.HyperLogLogMonoid | |
import com.twitter.algebird.HLL | |
import org.apache.commons.codec.binary.Base64 | |
import com.twitter.chill.algebird.AlgebirdRegistrar | |
import com.twitter.chill.{KryoPool, ScalaKryoInstantiator} | |
// base 64 encoded string | |
// val data = List(1, 1, 2, 2, 3, 3, 4, 4, 5, 5) | |
// 12 bit HLL | |
val hllHash = "AQBjb20udHdpdHRlci5hbGdlYmlyZC5TcGFyc2VITMwRAww9CAGyBQJiBwKUCAJcDQE=" | |
def loadListInt(data: List[Int]): com.twitter.algebird.HLL = { | |
import com.twitter.algebird.HyperLogLog.int2Bytes | |
val hll = new HyperLogLogMonoid(12) | |
val seqHll = data.map { hll(_) } | |
val returnHll = hll.sum(seqHll) | |
returnHll | |
} | |
val data = List(1, 1, 2, 2, 3, 3, 4, 4, 5, 5) | |
val dataLoadedHLL = loadListInt(data) | |
dataLoadedHLL.estimatedSize | |
// generates List of numbers between 1 and 100,000 | |
def random_numbers(numbersRequired: Int, rangeNumber: Int) = { | |
var generator = new scala.util.Random | |
1 to numbersRequired map { _ => generator.nextInt(rangeNumber) } | |
} | |
val oneMillionRandomNumbers = random_numbers(1000000, 100000).toList | |
val moarRandomNumbers = random_numbers(100000, 200000).toList | |
val unqiueNumbers = oneMillionRandomNumbers.toSet.size | |
val overlappingNUmbers = moarRandomNumbers.toSet.size | |
object KryoEnv { | |
val KryoPoolSize = 10 | |
val kryo = { | |
val inst = () ⇒ { | |
val newK = (new ScalaKryoInstantiator).newKryo() | |
newK.setReferences(false) | |
(new AlgebirdRegistrar).apply(newK) | |
newK | |
} | |
KryoPool.withByteArrayOutputStream(KryoPoolSize, inst) | |
} | |
} | |
val kryo = KryoEnv.kryo | |
def decodeString(hllHash:String): com.twitter.algebird.HLL = { | |
// decode the string into bytes | |
val bytes = Base64.decodeBase64(hllHash) | |
// hhh is a Hyperloglog data structure | |
val hyperll = kryo.fromBytes(bytes).asInstanceOf[HLL] | |
// return hyperll | |
hyperll | |
} | |
def encodeString(hll: HLL): String = { | |
// takes hll and converts to bytes | |
val bytes: Array[Byte] = kryo.toBytesWithClass(hll) | |
// encode to Base64 | |
val encoded = Base64.encodeBase64(bytes) | |
// create String object as Base64 encoded HLL | |
new String(encoded) | |
} | |
def plusHLLs(nowHLL: HLL, storedHLL: HLL): com.twitter.algebird.HLL = { | |
nowHLL + storedHLL | |
} | |
val hll1 = loadListInt(oneMillionRandomNumbers) | |
val hll2 = loadListInt(moarRandomNumbers) | |
val combined = plusHLLs(hll1, hll2) | |
combined.estimatedSize | |
val hll = decodeString(hllHash) | |
val string_hll = encodeString(hll) | |
import com.redis._ | |
import serialization._ | |
import Parse.Implicits._ | |
import Parse.Implicits.parseByteArray | |
val r = new RedisClient("localhost", 6379) | |
val s = r.get[Array[Byte]]("key") | |
val tryDecode: scala.util.Try[Any] = KryoInjection.invert(s.get) | |
val mmm = tryDecode.get.asInstanceOf[com.twitter.algebird.HLL] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment