Last active
July 16, 2019 09:33
-
-
Save sbatururimi/65f7b48c45953d029bd017dafbef63f3 to your computer and use it in GitHub Desktop.
Very fast hashing, murmur3+limit to wanted number of bucket(check the last line)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
NUMBER_BUCKETS = 600 | |
def hash_bucket(data, num_buckets=NUMBER_BUCKETS, seed=42): | |
"""MurmurHash3 was written by Austin Appleby, and is placed in the | |
public domain. The author hereby disclaims copyright to this source | |
code.""" | |
c1 = 0xcc9e2d51 | |
c2 = 0x1b873593 | |
length = len(data) | |
h1 = seed | |
roundedEnd = (length & 0xfffffffc) # round down to 4 byte block | |
for i in range(0, roundedEnd, 4): | |
# little endian load order | |
k1 = (ord(data[i]) & 0xff) | ((ord(data[i + 1]) & 0xff) << 8) | \ | |
((ord(data[i + 2]) & 0xff) << 16) | (ord(data[i + 3]) << 24) | |
k1 *= c1 | |
k1 = (k1 << 15) | ((k1 & 0xffffffff) >> 17) # ROTL32(k1,15) | |
k1 *= c2 | |
h1 ^= k1 | |
h1 = (h1 << 13) | ((h1 & 0xffffffff) >> 19) # ROTL32(h1,13) | |
h1 = h1 * 5 + 0xe6546b64 | |
# tail | |
k1 = 0 | |
val = length & 0x03 | |
if val == 3: | |
k1 = (ord(data[roundedEnd + 2]) & 0xff) << 16 | |
# fallthrough | |
if val in [2, 3]: | |
k1 |= (ord(data[roundedEnd + 1]) & 0xff) << 8 | |
# fallthrough | |
if val in [1, 2, 3]: | |
k1 |= ord(data[roundedEnd]) & 0xff | |
k1 *= c1 | |
k1 = (k1 << 15) | ((k1 & 0xffffffff) >> 17) # ROTL32(k1,15) | |
k1 *= c2 | |
h1 ^= k1 | |
# finalization | |
h1 ^= length | |
# fmix(h1) | |
h1 ^= ((h1 & 0xffffffff) >> 16) | |
h1 *= 0x85ebca6b | |
h1 ^= ((h1 & 0xffffffff) >> 13) | |
h1 *= 0xc2b2ae35 | |
h1 ^= ((h1 & 0xffffffff) >> 16) | |
_hash = h1 & 0xffffffff | |
# limit now to the number of buckets | |
bucket = _hash % num_bucket | |
return bucket |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment