Skip to content

Instantly share code, notes, and snippets.

Created February 3, 2017 17:27
Show Gist options
  • Save anonymous/929d799d5f80794b293783acb9108992 to your computer and use it in GitHub Desktop.
Save anonymous/929d799d5f80794b293783acb9108992 to your computer and use it in GitHub Desktop.
Simulation of the CRUSH Multi Pick Anomaly
#!/usr/bin/env python
import random
from math import sqrt, log, pow
NPGS = 100000 # num PGs
R = 3 # num replicas
# OSDs: (id, weight)
# TODO: everything below is hard coded for exactly 4 OSDs
osds = [(0, 3), (1, 3), (2, 3), (3, 1)]
weights = {} # OSD weights.. we might tweak these round by round
orig_weights = {} # OSD weights. Read only.
total_weight = 0
for id, w in osds:
weights[id] = w
orig_weights[id] = w
total_weight += w
print 'OSDs (id: weight):', weights
# compute the expected # PGs for our 4 OSDs
expected = {
0: NPGS * R * weights[0]/total_weight,
1: NPGS * R * weights[1]/total_weight,
2: NPGS * R * weights[2]/total_weight,
3: NPGS * R * weights[3]/total_weight,
}
print '\nExpected PGs per OSD: ', expected
# initialize PG and PG-by-replicanum counters for each OSD
pgs = {
0: 0,
1: 0,
2: 0,
3: 0,
}
pgs_by_r = [
dict(pgs),
dict(pgs),
dict(pgs),
]
def weighted_choice(choices):
total = sum(w for c, w in choices)
r = random.uniform(0, total)
upto = 0
for c, w in choices:
if upto + w >= r:
return c
upto += w
assert False, "Shouldn't get here"
print "\nSimulating with existing CRUSH\n"
# simple CRUSH
# loop over all PGs, select one with weighted choice, remove that option for the next choice.
for i in xrange(NPGS):
choices = list(osds)
for r in xrange(R):
pick = weighted_choice(choices)
pgs[pick] += 1
pgs_by_r[r][pick] += 1
choices.remove((pick, weights[pick]),)
print 'Observed: ', pgs
print 'Observed for Nth replica: ', pgs_by_r
print "\nNow trying your new algorithm\n"
# initialize PG and PG-by-replicanum counters for each OSD
pgs = {
0: 0,
1: 0,
2: 0,
3: 0,
}
pgs_by_r = [
dict(pgs),
dict(pgs),
dict(pgs),
]
# modified CRUSH
# loop over each PG.
# After each replica selection, adjust the remaining choices' weights with your magic value.
for i in xrange(NPGS):
choices = list(osds)
for r in xrange(R):
total_weight_start_of_round = float(sum(w for c, w in choices)) # keep track of the original sum of all weights
#print choices
pick = weighted_choice(choices) # choose a OSD for this round
pgs[pick] += 1 # inc the chosen OSD's PG counter
pgs_by_r[r][pick] += 1 # inc the chosen OSD's PG-by-R counter
new_choices = [i for i in choices if i[0] != pick] # make a list of new choices, dropping the selected one
choices = new_choices
total_weight_next_round = float(sum(w for c, w in choices)) # calculate the sum of remaining OSD weights
# reweight if we have more than one choice left
if len(choices) > 1:
new_choices = []
for id, w in choices:
# put your bright idea for adjusting the OSD weights here:
# new_weight = f(w, orig_weights[id], r, R, total_weight, total_weight_start_of_round, total_weight_next_round)
#
# thoughts:
#
# it seems clear that the weight of the small OSDs should become relatively smaller than the large OSDs for each round.
# also f cannot be a constant factor on w -- this doesn't change the distribution at all.
#
# ideas:
# new_weight = w # this is the current implementation
# new_weight = orig_weights[id] # this is also the current implementation
#
# new_weight = w * total_weight_start_of_round / total_weight_next_round # useless because same factor applied to all weights
#
# new_weight = w / (total_weight_start_of_round - w) # Sage's idea. kinda works for 3,3,3,1 example, not really for 1,3,3,1
#
# new_weight = w*w # this goes in the right direction but is too aggressive
# new_weight = w*sqrt(w) # getting closer, but still too agressive
# new_weight = pow(w, sqrt(total_weight_start_of_round / total_weight_next_round)) # utter craziness, almost works
#
# None of above are continue working if we set OSD weights to 1,3,3,1 -- this is a hard problem.
#
new_weight = w / (total_weight_start_of_round - w)
assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round)
new_choices.append((id, new_weight),)
weights[id] = new_weight
choices = new_choices
print 'Observed: ', pgs
print 'Observed for Nth replica: ', pgs_by_r
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment