Skip to content

Instantly share code, notes, and snippets.

@dennis-tra
Forked from wootfish/beta_test.py
Created April 9, 2022 11:51
Show Gist options
  • Save dennis-tra/7e5908c427c30537b80f1fee886ad9e7 to your computer and use it in GitHub Desktop.
Save dennis-tra/7e5908c427c30537b80f1fee886ad9e7 to your computer and use it in GitHub Desktop.
Test scripts for DHT size estimation
# this script samples the distributions of the k closest peers to randomly
# selected addresses and saves these to data files
import datetime
from multiprocessing import Process, Event
import os
import pprint
import random
from test import PrefixTreeNode, L, k
upper_limit = 2**L - 1
def build_tree(size):
tree = PrefixTreeNode(L, 50)
for _ in range(size):
tree.insert(random.randint(0, upper_limit))
return tree
def run_tests(tree, num_lookups):
samples = [[] for _ in range(k)]
for n in range(num_lookups):
addr = random.randint(0, upper_limit)
results = [node_id ^ addr for node_id in tree.query(addr, k)]
for dist_list, this_dist in zip(samples, results):
dist_list.append(this_dist / upper_limit)
return samples
def worker(stop_event, sizes=(10, 1000, 100000), sample_size=50000):
while not stop_event.is_set():
print("Mark.")
results = {size: [] for size in sizes}
for size in sizes:
tree = build_tree(size)
samples = run_tests(tree, sample_size)
results[size] += samples
fname = "beta_data/" + datetime.datetime.now().isoformat()
with open(fname, "w") as f:
pprint.pprint(results, stream=f, width=200)
f.write("\n")
if __name__ == "__main__":
cpus = os.cpu_count() - 1
event = Event()
workers = [Process(target=worker, args=(event,)) for _ in range(cpus)]
for p in workers:
p.start()
print("Workers started.")
try:
while True:
pass
except KeyboardInterrupt:
print("Wrapping up...")
event.set()
for p in workers:
p.join()
print("Worker joined.")
print("Done.")
from matplotlib import pyplot as plt
import os
import numpy
import scipy.stats
def load_data():
data = {size: [[] for _ in range(8)] for size in (10, 1000, 100000)}
for dirent in os.scandir("beta_data"):
with open('beta_data/'+dirent.name) as f:
print("reading", dirent.name)
results = eval(f.read())
for size, samples in results.items():
for rank_list, rank_vals in zip(data[size], samples):
rank_list += rank_vals
return data
def show_plot():
rows = 2
cols = 4
fix, axs = plt.subplots(rows, cols)
samples = load_data()[100000]
for i, n_i_vals in enumerate(samples):
buckets = numpy.histogram_bin_edges(n_i_vals, bins=50, range=(0, 0.00025))
row = i // 4
col = i % 4
print(i, row, col)
axs[row, col].hist(n_i_vals, buckets, facecolor='g', density=True, stacked=True)
xs = numpy.linspace(0, 0.00025, 100)
y_pdf = scipy.stats.beta(a=i+1, b=100000-(i+1)+1).pdf(xs)
axs[row, col].plot(xs, y_pdf)
axs[row, col].grid(False)
axs[row, col].set_yticklabels([])
plt.show()
if __name__ == "__main__":
show_plot()
from matplotlib import pyplot as plt
import numpy
import scipy.stats
from scipy.special import gamma
import os
# window 790x634 (check this with xwininfo)
# plot sliders:
# left 0.01
# bottom 0.05
# right 0.99
# top 0.95
# wspace 0.20
# hspace 0.38
plt.rc('font', size=8)
network_sizes = (17, 1000, 250000)
sample_sizes = (10, 100, 2000)
lsq_data = {}
avg_data = {}
for n in network_sizes:
for s in sample_sizes:
lsq_data[n, s] = []
avg_data[n, s] = []
for dirent in os.scandir('size_data'):
with open('size_data/'+dirent.name) as f:
print("reading " + dirent.name)
contents = eval(f.read()) # fuck a pickle
for n in network_sizes:
for s in sample_sizes:
lsq_data[n, s] += contents[n, s]['lsq']
avg_data[n, s] += contents[n, s]['avg']
for n in network_sizes:
for s in sample_sizes:
avg_data[n, s].sort()
lsq_data[n, s].sort()
#plt.hist(avg_data[17, 2000], bins=range(12, 22), facecolor='blue', alpha=0.4)
#plt.hist(lsq_data[17, 2000], bins=range(12, 22), facecolor='green', alpha=0.6)
#plt.show()
rows = 3
cols = 3
fig, axs = plt.subplots(rows, cols)
for i, n in enumerate(network_sizes):
for j, s in enumerate(sample_sizes):
print()
print(n, s)
if n == 17:
buckets = tuple(range(12, 23, 1))
xticks = tuple(range(12, 23, 1))
elif n == 1000:
if s == 10:
buckets = tuple(range(650, 1551, 2))
xticks = tuple(range(700, 1551, 150))
else:
buckets = tuple(range(850, 1151, 2))
xticks = tuple(range(850, 1151, 50))
elif n == 250000:
if s == 10:
buckets = tuple(range(165000, 375001, 300))
xticks = tuple(range(170000, 370001, 40000))
else:
buckets = tuple(range(210000, 290001, 300))
xticks = tuple(range(210000, 290001, 20000))
else:
print(n)
assert False
print(n, s, sum(lsq_data[n, s])/len(lsq_data[n, s]))
axs[i, j].hist(lsq_data[n, s], buckets, facecolor='g', alpha=0.6, density=True)
axs[i, j].hist(avg_data[n, s], buckets, facecolor='b', alpha=0.4, density=True)
axs[i, j].grid(True)
axs[i, j].set_title(f"{n} peers, {s} lookups")
axs[i, j].set_xticks(xticks)
axs[i, j].set_yticks([])
if n == 250000:
axs[i, j].set_xticklabels([str(num)[:-3] + 'k' for num in xticks])
plt.show()
from multiprocessing import Process, Event
import os
import datetime
import random
import json
import heapq
import itertools
import pprint
L = 25
k = 8
POW_2 = 2**L
network_sizes = (17, 1000, 250000)
sample_sizes = (10, 100, 2000)
class PrefixTreeNode:
# prefix tree implementation, with size-capped buckets of values at leaves
def __init__(self, radix: int, bucket_size: int):
self.bucket_size = bucket_size
self.radix = radix
self.bit = 2**radix
self.contents = []
self.left_child = None # 0 branch
self.right_child = None # 1 branch
def insert(self, addr: int):
if self.contents is None:
if (addr & self.bit) == 0:
self.left_child.insert(addr)
else:
self.right_child.insert(addr)
else:
self.contents.append(addr)
if len(self.contents) > self.bucket_size:
if self.radix == 0:
raise Exception("tried to split leaf bucket. duplicate insert?")
contents = self.contents
self.contents = None
self.left_child = PrefixTreeNode(self.radix - 1, self.bucket_size)
self.right_child = PrefixTreeNode(self.radix - 1, self.bucket_size)
for addr in contents:
self.insert(addr)
def query(self, addr: int, k: int):
return tuple(itertools.islice(self._query(addr), k))
def _query(self, addr):
if self.contents is None:
if (addr & self.bit) == 0:
yield from self.left_child._query(addr)
yield from self.right_child._query(addr)
else:
yield from self.right_child._query(addr)
yield from self.left_child._query(addr)
else:
yield from sorted(self.contents, key=lambda item: item ^ addr)
lsq_results = {n: {m: [] for m in sample_sizes} for n in network_sizes}
avg_results = {n: {m: [] for m in sample_sizes} for n in network_sizes}
def make_tree(size):
tree = PrefixTreeNode(L, bucket_size=50)
for _ in range(size):
tree.insert(random.randrange(0, POW_2))
return tree
def make_estimates(net_size, num_lookups, num_repeats=1000):
lsq_results = []
avg_results = []
tree = None
for _ in range(num_repeats):
if tree is None or net_size == 17:
tree = make_tree(net_size)
d_samples = []
for _ in range(num_lookups):
addr = random.randrange(0, POW_2)
distances = [(d^addr)/POW_2 for d in tree.query(addr, k)]
d_samples.append(distances)
final_distances = [sum(sample[i] for sample in d_samples) / num_lookups
for i in range(k)]
lsq_estimate = k*(k+1)*(2*k+1)/(6*sum(i*d for i, d in enumerate(final_distances, 1))) - 1
lsq_results.append(lsq_estimate)
avg_estimate = sum(i/distance - 1 for i, distance in enumerate(final_distances, 1)) / len(final_distances)
avg_results.append(avg_estimate)
return {'lsq': lsq_results, 'avg': avg_results}
def worker(stop_event):
while not stop_event.is_set():
print("Mark.")
results = {}
for net_size in network_sizes:
for sample_size in sample_sizes:
key = (net_size, sample_size)
results[key] = make_estimates(*key)
fname = "size_data/" + datetime.datetime.now().isoformat()
with open(fname, "w") as f:
pprint.pprint(results, stream=f, width=200)
f.write("\n")
if __name__ == "__main__":
cpus = os.cpu_count()
event = Event()
workers = [Process(target=worker, args=(event,)) for _ in range(cpus)]
for p in workers:
p.start()
print("Workers started.")
try:
while True:
pass
except KeyboardInterrupt:
print("Wrapping up...")
event.set()
for p in workers:
p.join()
print("Worker joined.")
print("Done.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment