Skip to content

Instantly share code, notes, and snippets.

@mreschke
Last active June 5, 2025 22:59
Show Gist options
  • Save mreschke/099154a9c064d9b706e1beedd983c254 to your computer and use it in GitHub Desktop.
Save mreschke/099154a9c064d9b706e1beedd983c254 to your computer and use it in GitHub Desktop.
Thread Loadbalancing (murmer vs int)
import struct
from murmurhash2 import murmurhash2, murmurhash3
# pip install murmurhash2
def murmur_hash_2(data, seed=0):
length = len(data)
nblocks = length // 4
h1 = seed
c1 = 0xcc9e2d51
c2 = 0x1b873593
for i in range(nblocks):
k1 = struct.unpack_from('I', data, i * 4)[0]
k1 = k1 * c1
k1 = ((k1 << 15) | (k1 >> 17)) * c2
h1 ^= k1
h1 = ((h1 << 13) | (h1 >> 19)) * 5 + 0xe6546b64
tail_index = nblocks * 4
k1 = 0
for i in range(length % 4):
k1 ^= data[tail_index + i] << (i * 8)
k1 = k1 * c1
k1 = ((k1 << 15) | (k1 >> 17)) * c2
h1 ^= k1
h1 ^= length
h1 ^= (h1 >> 16)
h1 = h1 * 0x85ebca6b
h1 ^= (h1 >> 13)
h1 = h1 * 0xc2b2ae35
h1 ^= (h1 >> 16)
return h1
def no_hash(client_id):
# No hash only works because our client_ids are already integers
# Hashing is only needed if we are converting strings into integers to balance
thread = abs(client_id % max_threads) + 1
print(client_id, 'thread', thread)
return thread
def python_murmer(client_id):
data = str('ClientID:' + str(client_id))
h = murmur_hash_2(data.encode(), seed)
thread = abs(h % max_threads) + 1
print(data, 'murmer', h, 'thread', thread)
return thread
def rust_murmer2(client_id):
data = str('ClientID:' + str(client_id))
h = murmurhash2(data.encode(), seed)
thread = abs(h % max_threads) + 1
print(data, 'murmer', h, 'thread', thread)
return thread
def rust_murmer3(client_id):
data = str('ClientID:' + str(client_id))
h = murmurhash3(data.encode(), seed)
thread = abs(h % max_threads) + 1
print(data, 'murmer', h, 'thread', thread)
return thread
# Parameters
max_threads = 4
seed = 2016458
balance = {}
# Fake client_ids
for client_id in range(1000,5353):
thread = python_murmer(client_id)
# 1 1094
# 4 1138
# 2 1086
# 3 1035
#thread = rust_murmer2(client_id)
# 1 1059
# 3 1089
# 4 1069
# 2 1136
#thread = rust_murmer3(client_id)
# 3 1080
# 1 1084
# 2 1088
# 4 1101
#thread = no_hash(client_id)
# 1 1089
# 2 1088
# 3 1088
# 4 1088
# Add to balance List
if thread not in balance: balance[thread] = []
balance[thread].append(client_id)
# Results
print()
print("Thread Balance Array")
print("--------------------")
print(balance)
print()
print("Thread Counts")
for k,v in balance.items():
print(k, len(v))
@mreschke
Copy link
Author

mreschke commented Jun 5, 2025

Used to take client_id integers and balance them between {max_threads} number of threads. Don't need hash if your client_ids are already integers, just use thread = abs(client_id % max_threads) + 1 but if your values are strings, murmer2 is a great quick hash

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment