Last active
September 15, 2017 02:35
-
-
Save manuzhang/052e83dfda44f6f34678720bb4b93e75 to your computer and use it in GitHub Desktop.
Encode with Murmur Hash
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.nio.{ByteBuffer, ByteOrder} | |
// "com.google.guava" % "guava" % "16.0.1" | |
import com.google.common.hash.Hashing | |
object MurmurHash { | |
private val seed = 0x3c074a61 | |
def encode(prefix: Int, value: Long): Long = { | |
val pb = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(prefix).array() | |
val vb = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(value).array() | |
// TODO: will this be slow ? | |
val bs = pb ++ vb | |
encodeBytes(bs) | |
} | |
def encode(prefix1: Int, value1: Long, prefix2: Int, value2: Long): Long = { | |
val pb1 = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(prefix1).array() | |
val vb1 = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(value1).array() | |
val pb2 = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(prefix2).array() | |
val vb2 = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(value2).array() | |
// TODO: will this be slow ? | |
val bs = pb1 ++ vb1 ++ pb2 ++ vb2 | |
encodeBytes(bs) | |
} | |
private def encodeBytes(bytes: Array[Byte]): Long = { | |
// the guava version is little-endian variant of the original cpp version | |
// so we need to reverse the result byte array | |
val hashed = Hashing.murmur3_128(seed).hashBytes(bytes).asBytes().reverse | |
// the hash value is 128 bit while we only need 64 | |
val div = ByteBuffer.wrap(hashed.drop(8)).getLong() | |
val mod = 1L << 60 | |
((div % mod) + mod) % mod | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment