Last active
June 15, 2020 22:36
-
-
Save wootfish/eb6415002a399a961a8f49b5dd0c871e to your computer and use it in GitHub Desktop.
Measuring the resiliences of DHT networks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import wraps | |
def _read_memo(fname="memo"): | |
with open(fname, "r") as f: | |
memo = eval(f.read()) | |
return memo | |
def _write_memo(memo, fname="memo"): | |
with open(fname, "w") as f: | |
f.write(repr(memo)) | |
def cached_memoize(func): | |
memo = _read_memo() | |
@wraps(func) | |
def wrapper(*args): | |
if args not in memo: | |
res = func(*args) | |
memo[args] = res | |
_write_memo(memo) | |
return memo[args] | |
return wrapper |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from multiprocessing import Process, Event | |
import os | |
import random | |
import datetime | |
import itertools | |
from prefixtree import PrefixTreeNode as PrefixTree | |
n = 15000 | |
L = 32 | |
k = 16 | |
def make_addr_set(size): | |
addrs = set(random.randrange(2**L) for _ in range(size)) | |
while len(addrs) < size: | |
# in case the initial comprehension hit any collisions | |
addrs |= set(random.randrange(2**L) for _ in range(size - len(addrs))) | |
return addrs | |
def run_test(n, m, samples=20000): | |
defenders = make_addr_set(n) | |
attackers = make_addr_set(m) | |
# note that there is the possibility of overlap between defenders and | |
# attackers - this is ok, the model accounts for it | |
tree = PrefixTree(L, 50) | |
for addr in defenders | attackers: | |
tree.insert(addr) | |
resilient = 0 | |
compromised = 0 | |
for _ in range(samples): | |
addr = random.randrange(2**L) | |
lookup_set = tree.query(addr, k) | |
if any(addr in defenders for addr in lookup_set): | |
resilient += 1 | |
else: | |
compromised += 1 | |
return resilient, compromised | |
def run_tests(m_vals): | |
results = {} | |
print() | |
for i, m in enumerate(m_vals): | |
print(f"({i+1}/{len(m_vals)}) m = {m}") | |
key = (n, m, L, k) | |
results[key] = run_test(n, m) | |
print("\nresults:", results) | |
return results | |
def worker(stop_event): | |
while not stop_event.is_set(): | |
m_vals = range(0, 500000+1, 25000) | |
results = run_tests(m_vals) | |
fname = "data/" + datetime.datetime.now().isoformat() | |
with open(fname, "w") as f: | |
f.write(repr(results)) | |
if __name__ == "__main__": | |
cpus = os.cpu_count() - 1 # leave one core free for other work | |
event = Event() | |
workers = [Process(target=worker, args=(event,)) for _ in range(cpus)] | |
for p in workers: | |
p.start() | |
print("Workers started.") | |
try: | |
while True: | |
pass | |
except KeyboardInterrupt: | |
print("Wrapping up...") | |
event.set() | |
for p in workers: | |
p.join() | |
print("Worker joined.") | |
print("Done.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from thm1 import thm_1 as _thm_1 | |
from cached_memoize import cached_memoize | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import os | |
thm_1 = cached_memoize(_thm_1) | |
n = 15000 | |
L = 32 | |
k = 16 | |
def load_data(): | |
d = {} | |
for dirent in os.scandir("data/"): | |
with open("data/" + dirent.name) as f: | |
print("reading", dirent.name) | |
results = eval(f.read()) | |
for key, value in results.items(): | |
r_old, c_old = d.setdefault(key, (0, 0)) | |
r_new, c_new = value | |
d[key] = (r_old+r_new, c_old+c_new) | |
return d | |
def main(slices=100): | |
# plots E[R_{L,k}] with fifteen thousand honest peers as the number of | |
# malicious peers ranges from zero to five hundred thousand | |
d = load_data() | |
print("Data loaded.") | |
t = np.arange(0, 500000, 500000/slices) | |
t = np.append(t, [500000]) | |
model_curve = [thm_1(L, n, m, k, False) for m in t] | |
print("Model loaded.") | |
print(d) | |
m_vals = sorted(_m for _n, _m, _L, _k in d if (_L, _n, _k) == (L, n, k)) | |
measurements = [d[n, m, L, k] for m in m_vals] | |
R_vals = [r / (r + c) for r, c in measurements] | |
print(m_vals) | |
print(R_vals) | |
plt.plot(t, model_curve, "g-", label="predicted") | |
plt.plot(m_vals, R_vals, "bo", label="observed", markerfacecolor='none') | |
plt.legend(loc="lower right") | |
plt.axis([0, 500000, 0, 1]) | |
plt.show() | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from thm1 import thm_1 as _thm_1 | |
from cached_memoize import cached_memoize | |
import matplotlib.pyplot as plt | |
import numpy as np | |
thm_1 = cached_memoize(_thm_1) | |
L = 32 | |
def main(net_size=5000, slices=100): | |
# plots E[R_{L,k}] as a function of the ratio of honest peers to total peers | |
def get_curve(xs, k): | |
results = [] | |
for x in xs: | |
n = round(x*net_size) | |
m = net_size - n | |
results.append(thm_1(L, n, m, k, False)) | |
return results | |
t = np.arange(0, 1, 1/slices) | |
t = np.append(t, [1]) | |
print("Running the numbers...") | |
for k, c in zip((2, 4, 8, 16, 32), 'rycbg'): | |
plt.plot(t, get_curve(t, k), f"{c}-", label=f"k={k}") | |
print("Plotting...") | |
plt.legend(loc="lower right") | |
plt.axis([0, 1, 0, 1.00]) | |
plt.show() | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
class PrefixTreeNode: | |
# prefix tree implementation, with size-capped buckets of values at leaves | |
def __init__(self, radix: int, bucket_size: int): | |
self.bucket_size = bucket_size | |
self.radix = radix | |
self.bit = 2**radix | |
self.contents = [] | |
self.left_child = None # 0 branch | |
self.right_child = None # 1 branch | |
def insert(self, addr: int): | |
if self.contents is None: | |
if (addr & self.bit) == 0: | |
self.left_child.insert(addr) | |
else: | |
self.right_child.insert(addr) | |
else: | |
self.contents.append(addr) | |
if len(self.contents) > self.bucket_size: | |
if self.radix == 0: | |
raise Exception("tried to split leaf bucket. duplicate insert?") | |
contents = self.contents | |
self.contents = None | |
self.left_child = PrefixTreeNode(self.radix - 1, self.bucket_size) | |
self.right_child = PrefixTreeNode(self.radix - 1, self.bucket_size) | |
for addr in contents: | |
self.insert(addr) | |
def query(self, addr: int, k: int): | |
return tuple(itertools.islice(self._query(addr), k)) | |
def _query(self, addr): | |
if self.contents is None: | |
if (addr & self.bit) == 0: | |
yield from self.left_child._query(addr) | |
yield from self.right_child._query(addr) | |
else: | |
yield from self.right_child._query(addr) | |
yield from self.left_child._query(addr) | |
else: | |
yield from sorted(self.contents, key=lambda item: item ^ addr) | |
def make_tree(size): | |
tree = PrefixTreeNode(L, bucket_size=50) | |
for _ in range(size): | |
tree.insert(random.randrange(0, POW_2)) | |
return tree |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import hypergeom, binom | |
import numpy as np | |
def thm_1(L, n, m, k, quiet=True, use_binom=False): | |
""" | |
Computes E[R_{l,k}] using the method derived in Theorem 1. | |
For debug output, specify `quiet=False`. | |
To speed things up at the cost of a slight reduction in accuracy, specify | |
`use_binom=True`. This parameter causes all hypergeometric distributions to | |
be approximated by binomial distributions. | |
""" | |
# check for the trivial case | |
if n == 0: | |
return 0 | |
# let's start by defining functions for some of the proof's identities | |
def P_N(h): # eqn 1.7 | |
if use_binom: | |
return (1-binom(n, 2**(h-L)).pmf(0)) / (1-binom(n, 2**(h-L+1)).pmf(0)) | |
else: | |
return (1-hypergeom(2**L, n, 2**h).pmf(0)) / (1-hypergeom(2**L, n, 2**(h+1)).pmf(0)) | |
def P_E(h): # eqn 1.8 | |
return 1-P_N(h) | |
def P_A(h, a): # eqn 1.9 | |
if use_binom: | |
return binom(m, 2**(h-L)).pmf(a) | |
else: | |
return hypergeom(2**L, m, 2**h).pmf(a) | |
# here's the main algorithm | |
if not quiet: | |
print(f"thm_1({L}, {n}, {m}, {k})") | |
if use_binom: | |
print("Using binomial optimization; results will not be 100% accurate.") | |
# if not quiet: | |
# print() | |
# print("NEST probabilities for h in range(L):") | |
# print(repr(np.array([P_N(h) for h in range(L)]))) | |
# print() | |
# R_hk records the calculated expectations E[R_{h,k'} | N_{h+1}] for 0 <= h < L and 0 <= k' <= k | |
R_hk = np.zeros([L, k+1]) | |
# We'll start by using Equation 1.4 to populate R_hk[0, :]. | |
# Then once the h=0 case is finished we'll progress to h=1 and so on. | |
# R_hk[0, 0] starts at its correct value: 0, since k=0 | |
if use_binom: | |
R_hk[0, 1] = 1 - P_A(0, 1) * binom(n-1, 1/2**L).pmf(0) / 2 | |
else: | |
R_hk[0, 1] = 1 - P_A(0, 1) * hypergeom(2**L, n-1, 1).pmf(0) / 2 | |
R_hk[0, 2:] = 1 | |
# now it's time to get to iteratively work through the h > 1 cases | |
for h in range(1, L): | |
for k_prime in range(1, k+1): | |
# applying Equation 1.2 | |
nonempty_expectation = R_hk[h-1, k_prime] | |
# applying Equation 1.6 | |
empty_expectation = sum( | |
P_A(h, a) * R_hk[h-1, k_prime-a] | |
for a in range(k_prime) | |
) | |
# applying Equation 1.5 | |
R_hk[h, k_prime] = (P_N(h) * nonempty_expectation | |
+ P_E(h) * empty_expectation) | |
# if not quiet: | |
# print() | |
# print("Table of R_hk's values:") | |
# print(repr(R_hk)) | |
# print() | |
# applying Equation 1.3 to get the final result | |
return R_hk[L-1, k] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment