Created
February 3, 2017 17:27
-
-
Save anonymous/929d799d5f80794b293783acb9108992 to your computer and use it in GitHub Desktop.
Simulation of the CRUSH Multi Pick Anomaly
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import random | |
from math import sqrt, log, pow | |
NPGS = 100000 # num PGs | |
R = 3 # num replicas | |
# OSDs: (id, weight) | |
# TODO: everything below is hard coded for exactly 4 OSDs | |
osds = [(0, 3), (1, 3), (2, 3), (3, 1)] | |
weights = {} # OSD weights.. we might tweak these round by round | |
orig_weights = {} # OSD weights. Read only. | |
total_weight = 0 | |
for id, w in osds: | |
weights[id] = w | |
orig_weights[id] = w | |
total_weight += w | |
print 'OSDs (id: weight):', weights | |
# compute the expected # PGs for our 4 OSDs | |
expected = { | |
0: NPGS * R * weights[0]/total_weight, | |
1: NPGS * R * weights[1]/total_weight, | |
2: NPGS * R * weights[2]/total_weight, | |
3: NPGS * R * weights[3]/total_weight, | |
} | |
print '\nExpected PGs per OSD: ', expected | |
# initialize PG and PG-by-replicanum counters for each OSD | |
pgs = { | |
0: 0, | |
1: 0, | |
2: 0, | |
3: 0, | |
} | |
pgs_by_r = [ | |
dict(pgs), | |
dict(pgs), | |
dict(pgs), | |
] | |
def weighted_choice(choices): | |
total = sum(w for c, w in choices) | |
r = random.uniform(0, total) | |
upto = 0 | |
for c, w in choices: | |
if upto + w >= r: | |
return c | |
upto += w | |
assert False, "Shouldn't get here" | |
print "\nSimulating with existing CRUSH\n" | |
# simple CRUSH | |
# loop over all PGs, select one with weighted choice, remove that option for the next choice. | |
for i in xrange(NPGS): | |
choices = list(osds) | |
for r in xrange(R): | |
pick = weighted_choice(choices) | |
pgs[pick] += 1 | |
pgs_by_r[r][pick] += 1 | |
choices.remove((pick, weights[pick]),) | |
print 'Observed: ', pgs | |
print 'Observed for Nth replica: ', pgs_by_r | |
print "\nNow trying your new algorithm\n" | |
# initialize PG and PG-by-replicanum counters for each OSD | |
pgs = { | |
0: 0, | |
1: 0, | |
2: 0, | |
3: 0, | |
} | |
pgs_by_r = [ | |
dict(pgs), | |
dict(pgs), | |
dict(pgs), | |
] | |
# modified CRUSH | |
# loop over each PG. | |
# After each replica selection, adjust the remaining choices' weights with your magic value. | |
for i in xrange(NPGS): | |
choices = list(osds) | |
for r in xrange(R): | |
total_weight_start_of_round = float(sum(w for c, w in choices)) # keep track of the original sum of all weights | |
#print choices | |
pick = weighted_choice(choices) # choose a OSD for this round | |
pgs[pick] += 1 # inc the chosen OSD's PG counter | |
pgs_by_r[r][pick] += 1 # inc the chosen OSD's PG-by-R counter | |
new_choices = [i for i in choices if i[0] != pick] # make a list of new choices, dropping the selected one | |
choices = new_choices | |
total_weight_next_round = float(sum(w for c, w in choices)) # calculate the sum of remaining OSD weights | |
# reweight if we have more than one choice left | |
if len(choices) > 1: | |
new_choices = [] | |
for id, w in choices: | |
# put your bright idea for adjusting the OSD weights here: | |
# new_weight = f(w, orig_weights[id], r, R, total_weight, total_weight_start_of_round, total_weight_next_round) | |
# | |
# thoughts: | |
# | |
# it seems clear that the weight of the small OSDs should become relatively smaller than the large OSDs for each round. | |
# also f cannot be a constant factor on w -- this doesn't change the distribution at all. | |
# | |
# ideas: | |
# new_weight = w # this is the current implementation | |
# new_weight = orig_weights[id] # this is also the current implementation | |
# | |
# new_weight = w * total_weight_start_of_round / total_weight_next_round # useless because same factor applied to all weights | |
# | |
# new_weight = w / (total_weight_start_of_round - w) # Sage's idea. kinda works for 3,3,3,1 example, not really for 1,3,3,1 | |
# | |
# new_weight = w*w # this goes in the right direction but is too aggressive | |
# new_weight = w*sqrt(w) # getting closer, but still too agressive | |
# new_weight = pow(w, sqrt(total_weight_start_of_round / total_weight_next_round)) # utter craziness, almost works | |
# | |
# None of above are continue working if we set OSD weights to 1,3,3,1 -- this is a hard problem. | |
# | |
new_weight = w / (total_weight_start_of_round - w) | |
assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round) | |
new_choices.append((id, new_weight),) | |
weights[id] = new_weight | |
choices = new_choices | |
print 'Observed: ', pgs | |
print 'Observed for Nth replica: ', pgs_by_r |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment