hashbrowncipher · September 4, 2019 02:08
diff --git a/tokens.py b/tokens.py
 from fractions import Fraction as f


 def factors_of_two(n):
    return n & ~(n - 1)


 def _tokens_slice(count, zones):
    # We cannot have zones be a multiple of two and also have tokens with
    # equal spacing from each other.
    # To sketch a proof of why this must be, assume that we are deploying in
    # two zones, with one node per zone.
    # In that case, equally spaced nodes would look like:
    # Zone A: 0 degrees
    # Zone B: 180 degrees
    # To double such a cluster while keeping equal node spacing, the allocation
    # must become:
    # Zone A: 0 degrees, 180 degrees
    # Zone B: 90 degrees, 270 degrees
    # But this allocates a zone A node at 180 degrees, where there is already a
    # zone B node. And this moves the two zone B nodes to locations where there
    # were previously none.

    # An alternative might be to give up on equal node spacing. But that would
    # have (at best) limited benefit.
    # Zone A: 0 degrees
    # Zone B: 1 degree
    # Zone C: 2 degrees
    # Zone D: 3 degrees

    # This essentially mandates RF=4, because a lower RF would result in most
    # of the data in the cluster being allocated to Zone A, B, and C, with only
    # data in the range (0, 3] being allocated onto Zone D.
    if len(zones) % 2 == 0:
        raise ValueError("Cannot create tokens for an even number of zones")

    # The generation algorithm is designed to produce the same outcome no
    # matter whether tokens are generated by initialization or by doubling, so
    # we "lay out" the initial set of tokens as if we were starting from a
    # cluster with no factors of two in it.
    divisor = len(zones) * count / factors_of_two(count)

    bound = f(1, count)
    for i, zone in enumerate(zones):
        yield f(i, divisor) % bound, zone


 def tokens(count, zones):
    single_slice = list(_tokens_slice(count, zones))
    divisor = f(1, count)
    for i in range(count):
        base = i * divisor
        for t, zone in single_slice:
            yield base + t, zone


 if __name__ == "__main__":

    def helper(*args):
        return sorted(_tokens_slice(*args))

    assert factors_of_two(0) == 0
    assert factors_of_two(1) == 1
    assert factors_of_two(2) == 2
    assert factors_of_two(3) == 1

    assert helper(1, "abc") == [(f(0, 3), "a"), (f(1, 3), "b"), (f(2, 3), "c")]
    assert helper(2, "abc") == [(f(0, 6), "a"), (f(1, 6), "c"), (f(2, 6), "b")]
    assert helper(3, "abc") == [(f(0, 9), "a"), (f(1, 9), "b"), (f(2, 9), "c")]
    assert helper(4, "abc") == [(f(0, 12), "a"), (f(1, 12), "b"), (f(2, 12), "c")]
    assert helper(5, "abc") == [(f(0, 15), "a"), (f(1, 15), "b"), (f(2, 15), "c")]
    assert helper(6, "abc") == [(f(0, 18), "a"), (f(1, 18), "c"), (f(2, 18), "b")]
    assert helper(8, "abc") == [(f(0, 24), "a"), (f(1, 24), "c"), (f(1, 12), "b")]
    assert helper(20, "abc") == [(f(0, 60), "a"), (f(1, 60), "b"), (f(2, 60), "c")]

    assert helper(1, "abcde") == [
        (f(0, 5), "a"),
        (f(1, 5), "b"),
        (f(2, 5), "c"),
        (f(3, 5), "d"),
        (f(4, 5), "e"),
    ]
    assert helper(2, "abcde") == [
        (f(0, 10), "a"),
        (f(1, 10), "d"),
        (f(2, 10), "b"),
        (f(3, 10), "e"),
        (f(4, 10), "c"),
    ]
    assert helper(3, "abcde") == [
        (f(0, 15), "a"),
        (f(1, 15), "b"),
        (f(2, 15), "c"),
        (f(3, 15), "d"),
        (f(4, 15), "e"),
    ]
    assert helper(4, "abcde") == [
        (f(0, 20), "a"),
        (f(1, 20), "e"),
        (f(2, 20), "d"),
        (f(3, 20), "c"),
        (f(4, 20), "b"),
    ]
    assert helper(8, "abcde") == [
        (f(0, 40), "a"),
        (f(1, 40), "c"),
        (f(2, 40), "e"),
        (f(3, 40), "b"),
        (f(4, 40), "d"),
    ]
    assert helper(16, "abcde") == [
        (f(0, 80), "a"),
        (f(1, 80), "b"),
        (f(2, 80), "c"),
        (f(3, 80), "d"),
        (f(4, 80), "e"),
    ]

    assert sorted(tokens(2, "abc")) == [
        (f(0, 6), "a"),
        (f(1, 6), "c"),
        (f(2, 6), "b"),
        (f(3, 6), "a"),
        (f(4, 6), "c"),
        (f(5, 6), "b"),
    ]
	from fractions import Fraction as f


	def factors_of_two(n):
	return n & ~(n - 1)


	def _tokens_slice(count, zones):
	# We cannot have zones be a multiple of two and also have tokens with
	# equal spacing from each other.
	# To sketch a proof of why this must be, assume that we are deploying in
	# two zones, with one node per zone.
	# In that case, equally spaced nodes would look like:
	# Zone A: 0 degrees
	# Zone B: 180 degrees
	# To double such a cluster while keeping equal node spacing, the allocation
	# must become:
	# Zone A: 0 degrees, 180 degrees
	# Zone B: 90 degrees, 270 degrees
	# But this allocates a zone A node at 180 degrees, where there is already a
	# zone B node. And this moves the two zone B nodes to locations where there
	# were previously none.

	# An alternative might be to give up on equal node spacing. But that would
	# have (at best) limited benefit.
	# Zone A: 0 degrees
	# Zone B: 1 degree
	# Zone C: 2 degrees
	# Zone D: 3 degrees

	# This essentially mandates RF=4, because a lower RF would result in most
	# of the data in the cluster being allocated to Zone A, B, and C, with only
	# data in the range (0, 3] being allocated onto Zone D.
	if len(zones) % 2 == 0:
	raise ValueError("Cannot create tokens for an even number of zones")

	# The generation algorithm is designed to produce the same outcome no
	# matter whether tokens are generated by initialization or by doubling, so
	# we "lay out" the initial set of tokens as if we were starting from a
	# cluster with no factors of two in it.
	divisor = len(zones) * count / factors_of_two(count)

	bound = f(1, count)
	for i, zone in enumerate(zones):
	yield f(i, divisor) % bound, zone


	def tokens(count, zones):
	single_slice = list(_tokens_slice(count, zones))
	divisor = f(1, count)
	for i in range(count):
	base = i * divisor
	for t, zone in single_slice:
	yield base + t, zone


	if __name__ == "__main__":

	def helper(*args):
	return sorted(_tokens_slice(*args))

	assert factors_of_two(0) == 0
	assert factors_of_two(1) == 1
	assert factors_of_two(2) == 2
	assert factors_of_two(3) == 1

	assert helper(1, "abc") == [(f(0, 3), "a"), (f(1, 3), "b"), (f(2, 3), "c")]
	assert helper(2, "abc") == [(f(0, 6), "a"), (f(1, 6), "c"), (f(2, 6), "b")]
	assert helper(3, "abc") == [(f(0, 9), "a"), (f(1, 9), "b"), (f(2, 9), "c")]
	assert helper(4, "abc") == [(f(0, 12), "a"), (f(1, 12), "b"), (f(2, 12), "c")]
	assert helper(5, "abc") == [(f(0, 15), "a"), (f(1, 15), "b"), (f(2, 15), "c")]
	assert helper(6, "abc") == [(f(0, 18), "a"), (f(1, 18), "c"), (f(2, 18), "b")]
	assert helper(8, "abc") == [(f(0, 24), "a"), (f(1, 24), "c"), (f(1, 12), "b")]
	assert helper(20, "abc") == [(f(0, 60), "a"), (f(1, 60), "b"), (f(2, 60), "c")]

	assert helper(1, "abcde") == [
	(f(0, 5), "a"),
	(f(1, 5), "b"),
	(f(2, 5), "c"),
	(f(3, 5), "d"),
	(f(4, 5), "e"),
	]
	assert helper(2, "abcde") == [
	(f(0, 10), "a"),
	(f(1, 10), "d"),
	(f(2, 10), "b"),
	(f(3, 10), "e"),
	(f(4, 10), "c"),
	]
	assert helper(3, "abcde") == [
	(f(0, 15), "a"),
	(f(1, 15), "b"),
	(f(2, 15), "c"),
	(f(3, 15), "d"),
	(f(4, 15), "e"),
	]
	assert helper(4, "abcde") == [
	(f(0, 20), "a"),
	(f(1, 20), "e"),
	(f(2, 20), "d"),
	(f(3, 20), "c"),
	(f(4, 20), "b"),
	]
	assert helper(8, "abcde") == [
	(f(0, 40), "a"),
	(f(1, 40), "c"),
	(f(2, 40), "e"),
	(f(3, 40), "b"),
	(f(4, 40), "d"),
	]
	assert helper(16, "abcde") == [
	(f(0, 80), "a"),
	(f(1, 80), "b"),
	(f(2, 80), "c"),
	(f(3, 80), "d"),
	(f(4, 80), "e"),
	]

	assert sorted(tokens(2, "abc")) == [
	(f(0, 6), "a"),
	(f(1, 6), "c"),
	(f(2, 6), "b"),
	(f(3, 6), "a"),
	(f(4, 6), "c"),
	(f(5, 6), "b"),
	]