Skip to content

Instantly share code, notes, and snippets.

@hashbrowncipher
Created September 4, 2019 02:08
Show Gist options
  • Save hashbrowncipher/ab9be812e33b8366648af6a0c57019ab to your computer and use it in GitHub Desktop.
Save hashbrowncipher/ab9be812e33b8366648af6a0c57019ab to your computer and use it in GitHub Desktop.
Cassandra tokens calculator
from fractions import Fraction as f
def factors_of_two(n):
return n & ~(n - 1)
def _tokens_slice(count, zones):
# We cannot have zones be a multiple of two and also have tokens with
# equal spacing from each other.
# To sketch a proof of why this must be, assume that we are deploying in
# two zones, with one node per zone.
# In that case, equally spaced nodes would look like:
# Zone A: 0 degrees
# Zone B: 180 degrees
# To double such a cluster while keeping equal node spacing, the allocation
# must become:
# Zone A: 0 degrees, 180 degrees
# Zone B: 90 degrees, 270 degrees
# But this allocates a zone A node at 180 degrees, where there is already a
# zone B node. And this moves the two zone B nodes to locations where there
# were previously none.
# An alternative might be to give up on equal node spacing. But that would
# have (at best) limited benefit.
# Zone A: 0 degrees
# Zone B: 1 degree
# Zone C: 2 degrees
# Zone D: 3 degrees
# This essentially mandates RF=4, because a lower RF would result in most
# of the data in the cluster being allocated to Zone A, B, and C, with only
# data in the range (0, 3] being allocated onto Zone D.
if len(zones) % 2 == 0:
raise ValueError("Cannot create tokens for an even number of zones")
# The generation algorithm is designed to produce the same outcome no
# matter whether tokens are generated by initialization or by doubling, so
# we "lay out" the initial set of tokens as if we were starting from a
# cluster with no factors of two in it.
divisor = len(zones) * count / factors_of_two(count)
bound = f(1, count)
for i, zone in enumerate(zones):
yield f(i, divisor) % bound, zone
def tokens(count, zones):
single_slice = list(_tokens_slice(count, zones))
divisor = f(1, count)
for i in range(count):
base = i * divisor
for t, zone in single_slice:
yield base + t, zone
if __name__ == "__main__":
def helper(*args):
return sorted(_tokens_slice(*args))
assert factors_of_two(0) == 0
assert factors_of_two(1) == 1
assert factors_of_two(2) == 2
assert factors_of_two(3) == 1
assert helper(1, "abc") == [(f(0, 3), "a"), (f(1, 3), "b"), (f(2, 3), "c")]
assert helper(2, "abc") == [(f(0, 6), "a"), (f(1, 6), "c"), (f(2, 6), "b")]
assert helper(3, "abc") == [(f(0, 9), "a"), (f(1, 9), "b"), (f(2, 9), "c")]
assert helper(4, "abc") == [(f(0, 12), "a"), (f(1, 12), "b"), (f(2, 12), "c")]
assert helper(5, "abc") == [(f(0, 15), "a"), (f(1, 15), "b"), (f(2, 15), "c")]
assert helper(6, "abc") == [(f(0, 18), "a"), (f(1, 18), "c"), (f(2, 18), "b")]
assert helper(8, "abc") == [(f(0, 24), "a"), (f(1, 24), "c"), (f(1, 12), "b")]
assert helper(20, "abc") == [(f(0, 60), "a"), (f(1, 60), "b"), (f(2, 60), "c")]
assert helper(1, "abcde") == [
(f(0, 5), "a"),
(f(1, 5), "b"),
(f(2, 5), "c"),
(f(3, 5), "d"),
(f(4, 5), "e"),
]
assert helper(2, "abcde") == [
(f(0, 10), "a"),
(f(1, 10), "d"),
(f(2, 10), "b"),
(f(3, 10), "e"),
(f(4, 10), "c"),
]
assert helper(3, "abcde") == [
(f(0, 15), "a"),
(f(1, 15), "b"),
(f(2, 15), "c"),
(f(3, 15), "d"),
(f(4, 15), "e"),
]
assert helper(4, "abcde") == [
(f(0, 20), "a"),
(f(1, 20), "e"),
(f(2, 20), "d"),
(f(3, 20), "c"),
(f(4, 20), "b"),
]
assert helper(8, "abcde") == [
(f(0, 40), "a"),
(f(1, 40), "c"),
(f(2, 40), "e"),
(f(3, 40), "b"),
(f(4, 40), "d"),
]
assert helper(16, "abcde") == [
(f(0, 80), "a"),
(f(1, 80), "b"),
(f(2, 80), "c"),
(f(3, 80), "d"),
(f(4, 80), "e"),
]
assert sorted(tokens(2, "abc")) == [
(f(0, 6), "a"),
(f(1, 6), "c"),
(f(2, 6), "b"),
(f(3, 6), "a"),
(f(4, 6), "c"),
(f(5, 6), "b"),
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment