Last active
August 18, 2022 14:59
-
-
Save reorx/8470123 to your computer and use it in GitHub Desktop.
Consistent hash implementation in Python.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
hash_ring | |
~~~~~~~~~~~~~~ | |
Implements consistent hashing that can be used when | |
the number of server nodes can increase or decrease (like in memcached). | |
Consistent hashing is a scheme that provides a hash table functionality | |
in a way that the adding or removing of one slot | |
does not significantly change the mapping of keys to slots. | |
More information about consistent hashing can be read in these articles: | |
"Web Caching with Consistent Hashing": | |
http://www8.org/w8-papers/2a-webserver/caching/paper2.html | |
"Consistent hashing and random trees: | |
Distributed caching protocols for relieving hot spots on the World Wide Web (1997)": | |
http://citeseerx.ist.psu.edu/legacymapper?did=38148 | |
Example of usage:: | |
memcache_servers = ['192.168.0.246:11212', | |
'192.168.0.247:11212', | |
'192.168.0.249:11212'] | |
ring = HashRing(memcache_servers) | |
server = ring.get_node('my_key') | |
:copyright: 2008 by Amir Salihefendic. | |
:license: BSD | |
""" | |
import math | |
import sys | |
from bisect import bisect | |
if sys.version_info >= (2, 5): | |
import hashlib | |
md5_constructor = hashlib.md5 | |
else: | |
import md5 | |
md5_constructor = md5.new | |
class HashRing(object): | |
def __init__(self, nodes=None, weights=None): | |
"""`nodes` is a list of objects that have a proper __str__ representation. | |
`weights` is dictionary that sets weights to the nodes. The default | |
weight is that all nodes are equal. | |
""" | |
self.ring = dict() | |
self._sorted_keys = [] | |
self.nodes = nodes | |
if not weights: | |
weights = {} | |
self.weights = weights | |
self._generate_circle() | |
def _generate_circle(self): | |
"""Generates the circle. | |
""" | |
total_weight = 0 | |
for node in self.nodes: | |
total_weight += self.weights.get(node, 1) | |
for node in self.nodes: | |
weight = 1 | |
if node in self.weights: | |
weight = self.weights.get(node) | |
factor = math.floor((40*len(self.nodes)*weight) / total_weight); | |
for j in xrange(0, int(factor)): | |
b_key = self._hash_digest( '%s-%s' % (node, j) ) | |
for i in xrange(0, 3): | |
key = self._hash_val(b_key, lambda x: x+i*4) | |
self.ring[key] = node | |
self._sorted_keys.append(key) | |
self._sorted_keys.sort() | |
def get_node(self, string_key): | |
"""Given a string key a corresponding node in the hash ring is returned. | |
If the hash ring is empty, `None` is returned. | |
""" | |
pos = self.get_node_pos(string_key) | |
if pos is None: | |
return None | |
return self.ring[ self._sorted_keys[pos] ] | |
def get_node_pos(self, string_key): | |
"""Given a string key a corresponding node in the hash ring is returned | |
along with it's position in the ring. | |
If the hash ring is empty, (`None`, `None`) is returned. | |
""" | |
if not self.ring: | |
return None | |
key = self.gen_key(string_key) | |
nodes = self._sorted_keys | |
pos = bisect(nodes, key) | |
if pos == len(nodes): | |
return 0 | |
else: | |
return pos | |
def iterate_nodes(self, string_key, distinct=True): | |
"""Given a string key it returns the nodes as a generator that can hold the key. | |
The generator iterates one time through the ring | |
starting at the correct position. | |
if `distinct` is set, then the nodes returned will be unique, | |
i.e. no virtual copies will be returned. | |
""" | |
if not self.ring: | |
yield None, None | |
returned_values = set() | |
def distinct_filter(value): | |
if str(value) not in returned_values: | |
returned_values.add(str(value)) | |
return value | |
pos = self.get_node_pos(string_key) | |
for key in self._sorted_keys[pos:]: | |
val = distinct_filter(self.ring[key]) | |
if val: | |
yield val | |
for i, key in enumerate(self._sorted_keys): | |
if i < pos: | |
val = distinct_filter(self.ring[key]) | |
if val: | |
yield val | |
def gen_key(self, key): | |
"""Given a string key it returns a long value, | |
this long value represents a place on the hash ring. | |
md5 is currently used because it mixes well. | |
""" | |
b_key = self._hash_digest(key) | |
return self._hash_val(b_key, lambda x: x) | |
def _hash_val(self, b_key, entry_fn): | |
return (( b_key[entry_fn(3)] << 24) | |
|(b_key[entry_fn(2)] << 16) | |
|(b_key[entry_fn(1)] << 8) | |
| b_key[entry_fn(0)] ) | |
def _hash_digest(self, key): | |
m = md5_constructor() | |
m.update(key) | |
return map(ord, m.digest()) | |
#################### | |
# memcache_ring.py # | |
#################### | |
import memcache | |
import types | |
from hash_ring import HashRing | |
class MemcacheRing(memcache.Client): | |
"""Extends python-memcache so it uses consistent hashing to | |
distribute the keys. | |
""" | |
def __init__(self, servers, *k, **kw): | |
self.hash_ring = HashRing(servers) | |
memcache.Client.__init__(self, servers, *k, **kw) | |
self.server_mapping = {} | |
for server_uri, server_obj in zip(servers, self.servers): | |
self.server_mapping[server_uri] = server_obj | |
def _get_server(self, key): | |
if type(key) == types.TupleType: | |
return memcache.Client._get_server(key) | |
for i in range(self._SERVER_RETRIES): | |
iterator = self.hash_ring.iterate_nodes(key) | |
for server_uri in iterator: | |
server_obj = self.server_mapping[server_uri] | |
if server_obj.connect(): | |
return server_obj, key | |
return None, None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment