Last active
June 5, 2025 22:59
-
-
Save mreschke/099154a9c064d9b706e1beedd983c254 to your computer and use it in GitHub Desktop.
Thread Loadbalancing (murmer vs int)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
from murmurhash2 import murmurhash2, murmurhash3 | |
# pip install murmurhash2 | |
def murmur_hash_2(data, seed=0): | |
length = len(data) | |
nblocks = length // 4 | |
h1 = seed | |
c1 = 0xcc9e2d51 | |
c2 = 0x1b873593 | |
for i in range(nblocks): | |
k1 = struct.unpack_from('I', data, i * 4)[0] | |
k1 = k1 * c1 | |
k1 = ((k1 << 15) | (k1 >> 17)) * c2 | |
h1 ^= k1 | |
h1 = ((h1 << 13) | (h1 >> 19)) * 5 + 0xe6546b64 | |
tail_index = nblocks * 4 | |
k1 = 0 | |
for i in range(length % 4): | |
k1 ^= data[tail_index + i] << (i * 8) | |
k1 = k1 * c1 | |
k1 = ((k1 << 15) | (k1 >> 17)) * c2 | |
h1 ^= k1 | |
h1 ^= length | |
h1 ^= (h1 >> 16) | |
h1 = h1 * 0x85ebca6b | |
h1 ^= (h1 >> 13) | |
h1 = h1 * 0xc2b2ae35 | |
h1 ^= (h1 >> 16) | |
return h1 | |
def no_hash(client_id): | |
# No hash only works because our client_ids are already integers | |
# Hashing is only needed if we are converting strings into integers to balance | |
thread = abs(client_id % max_threads) + 1 | |
print(client_id, 'thread', thread) | |
return thread | |
def python_murmer(client_id): | |
data = str('ClientID:' + str(client_id)) | |
h = murmur_hash_2(data.encode(), seed) | |
thread = abs(h % max_threads) + 1 | |
print(data, 'murmer', h, 'thread', thread) | |
return thread | |
def rust_murmer2(client_id): | |
data = str('ClientID:' + str(client_id)) | |
h = murmurhash2(data.encode(), seed) | |
thread = abs(h % max_threads) + 1 | |
print(data, 'murmer', h, 'thread', thread) | |
return thread | |
def rust_murmer3(client_id): | |
data = str('ClientID:' + str(client_id)) | |
h = murmurhash3(data.encode(), seed) | |
thread = abs(h % max_threads) + 1 | |
print(data, 'murmer', h, 'thread', thread) | |
return thread | |
# Parameters | |
max_threads = 4 | |
seed = 2016458 | |
balance = {} | |
# Fake client_ids | |
for client_id in range(1000,5353): | |
thread = python_murmer(client_id) | |
# 1 1094 | |
# 4 1138 | |
# 2 1086 | |
# 3 1035 | |
#thread = rust_murmer2(client_id) | |
# 1 1059 | |
# 3 1089 | |
# 4 1069 | |
# 2 1136 | |
#thread = rust_murmer3(client_id) | |
# 3 1080 | |
# 1 1084 | |
# 2 1088 | |
# 4 1101 | |
#thread = no_hash(client_id) | |
# 1 1089 | |
# 2 1088 | |
# 3 1088 | |
# 4 1088 | |
# Add to balance List | |
if thread not in balance: balance[thread] = [] | |
balance[thread].append(client_id) | |
# Results | |
print() | |
print("Thread Balance Array") | |
print("--------------------") | |
print(balance) | |
print() | |
print("Thread Counts") | |
for k,v in balance.items(): | |
print(k, len(v)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Used to take client_id integers and balance them between {max_threads} number of threads. Don't need hash if your client_ids are already integers, just use
thread = abs(client_id % max_threads) + 1
but if your values are strings, murmer2 is a great quick hash