Created
September 4, 2019 02:08
-
-
Save hashbrowncipher/ab9be812e33b8366648af6a0c57019ab to your computer and use it in GitHub Desktop.
Cassandra tokens calculator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fractions import Fraction as f | |
def factors_of_two(n): | |
return n & ~(n - 1) | |
def _tokens_slice(count, zones): | |
# We cannot have zones be a multiple of two and also have tokens with | |
# equal spacing from each other. | |
# To sketch a proof of why this must be, assume that we are deploying in | |
# two zones, with one node per zone. | |
# In that case, equally spaced nodes would look like: | |
# Zone A: 0 degrees | |
# Zone B: 180 degrees | |
# To double such a cluster while keeping equal node spacing, the allocation | |
# must become: | |
# Zone A: 0 degrees, 180 degrees | |
# Zone B: 90 degrees, 270 degrees | |
# But this allocates a zone A node at 180 degrees, where there is already a | |
# zone B node. And this moves the two zone B nodes to locations where there | |
# were previously none. | |
# An alternative might be to give up on equal node spacing. But that would | |
# have (at best) limited benefit. | |
# Zone A: 0 degrees | |
# Zone B: 1 degree | |
# Zone C: 2 degrees | |
# Zone D: 3 degrees | |
# This essentially mandates RF=4, because a lower RF would result in most | |
# of the data in the cluster being allocated to Zone A, B, and C, with only | |
# data in the range (0, 3] being allocated onto Zone D. | |
if len(zones) % 2 == 0: | |
raise ValueError("Cannot create tokens for an even number of zones") | |
# The generation algorithm is designed to produce the same outcome no | |
# matter whether tokens are generated by initialization or by doubling, so | |
# we "lay out" the initial set of tokens as if we were starting from a | |
# cluster with no factors of two in it. | |
divisor = len(zones) * count / factors_of_two(count) | |
bound = f(1, count) | |
for i, zone in enumerate(zones): | |
yield f(i, divisor) % bound, zone | |
def tokens(count, zones): | |
single_slice = list(_tokens_slice(count, zones)) | |
divisor = f(1, count) | |
for i in range(count): | |
base = i * divisor | |
for t, zone in single_slice: | |
yield base + t, zone | |
if __name__ == "__main__": | |
def helper(*args): | |
return sorted(_tokens_slice(*args)) | |
assert factors_of_two(0) == 0 | |
assert factors_of_two(1) == 1 | |
assert factors_of_two(2) == 2 | |
assert factors_of_two(3) == 1 | |
assert helper(1, "abc") == [(f(0, 3), "a"), (f(1, 3), "b"), (f(2, 3), "c")] | |
assert helper(2, "abc") == [(f(0, 6), "a"), (f(1, 6), "c"), (f(2, 6), "b")] | |
assert helper(3, "abc") == [(f(0, 9), "a"), (f(1, 9), "b"), (f(2, 9), "c")] | |
assert helper(4, "abc") == [(f(0, 12), "a"), (f(1, 12), "b"), (f(2, 12), "c")] | |
assert helper(5, "abc") == [(f(0, 15), "a"), (f(1, 15), "b"), (f(2, 15), "c")] | |
assert helper(6, "abc") == [(f(0, 18), "a"), (f(1, 18), "c"), (f(2, 18), "b")] | |
assert helper(8, "abc") == [(f(0, 24), "a"), (f(1, 24), "c"), (f(1, 12), "b")] | |
assert helper(20, "abc") == [(f(0, 60), "a"), (f(1, 60), "b"), (f(2, 60), "c")] | |
assert helper(1, "abcde") == [ | |
(f(0, 5), "a"), | |
(f(1, 5), "b"), | |
(f(2, 5), "c"), | |
(f(3, 5), "d"), | |
(f(4, 5), "e"), | |
] | |
assert helper(2, "abcde") == [ | |
(f(0, 10), "a"), | |
(f(1, 10), "d"), | |
(f(2, 10), "b"), | |
(f(3, 10), "e"), | |
(f(4, 10), "c"), | |
] | |
assert helper(3, "abcde") == [ | |
(f(0, 15), "a"), | |
(f(1, 15), "b"), | |
(f(2, 15), "c"), | |
(f(3, 15), "d"), | |
(f(4, 15), "e"), | |
] | |
assert helper(4, "abcde") == [ | |
(f(0, 20), "a"), | |
(f(1, 20), "e"), | |
(f(2, 20), "d"), | |
(f(3, 20), "c"), | |
(f(4, 20), "b"), | |
] | |
assert helper(8, "abcde") == [ | |
(f(0, 40), "a"), | |
(f(1, 40), "c"), | |
(f(2, 40), "e"), | |
(f(3, 40), "b"), | |
(f(4, 40), "d"), | |
] | |
assert helper(16, "abcde") == [ | |
(f(0, 80), "a"), | |
(f(1, 80), "b"), | |
(f(2, 80), "c"), | |
(f(3, 80), "d"), | |
(f(4, 80), "e"), | |
] | |
assert sorted(tokens(2, "abc")) == [ | |
(f(0, 6), "a"), | |
(f(1, 6), "c"), | |
(f(2, 6), "b"), | |
(f(3, 6), "a"), | |
(f(4, 6), "c"), | |
(f(5, 6), "b"), | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment