-
-
Save dennis-tra/7e5908c427c30537b80f1fee886ad9e7 to your computer and use it in GitHub Desktop.
Test scripts for DHT size estimation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this script samples the distributions of the k closest peers to randomly | |
# selected addresses and saves these to data files | |
import datetime | |
from multiprocessing import Process, Event | |
import os | |
import pprint | |
import random | |
from test import PrefixTreeNode, L, k | |
upper_limit = 2**L - 1 | |
def build_tree(size): | |
tree = PrefixTreeNode(L, 50) | |
for _ in range(size): | |
tree.insert(random.randint(0, upper_limit)) | |
return tree | |
def run_tests(tree, num_lookups): | |
samples = [[] for _ in range(k)] | |
for n in range(num_lookups): | |
addr = random.randint(0, upper_limit) | |
results = [node_id ^ addr for node_id in tree.query(addr, k)] | |
for dist_list, this_dist in zip(samples, results): | |
dist_list.append(this_dist / upper_limit) | |
return samples | |
def worker(stop_event, sizes=(10, 1000, 100000), sample_size=50000): | |
while not stop_event.is_set(): | |
print("Mark.") | |
results = {size: [] for size in sizes} | |
for size in sizes: | |
tree = build_tree(size) | |
samples = run_tests(tree, sample_size) | |
results[size] += samples | |
fname = "beta_data/" + datetime.datetime.now().isoformat() | |
with open(fname, "w") as f: | |
pprint.pprint(results, stream=f, width=200) | |
f.write("\n") | |
if __name__ == "__main__": | |
cpus = os.cpu_count() - 1 | |
event = Event() | |
workers = [Process(target=worker, args=(event,)) for _ in range(cpus)] | |
for p in workers: | |
p.start() | |
print("Workers started.") | |
try: | |
while True: | |
pass | |
except KeyboardInterrupt: | |
print("Wrapping up...") | |
event.set() | |
for p in workers: | |
p.join() | |
print("Worker joined.") | |
print("Done.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from matplotlib import pyplot as plt | |
import os | |
import numpy | |
import scipy.stats | |
def load_data(): | |
data = {size: [[] for _ in range(8)] for size in (10, 1000, 100000)} | |
for dirent in os.scandir("beta_data"): | |
with open('beta_data/'+dirent.name) as f: | |
print("reading", dirent.name) | |
results = eval(f.read()) | |
for size, samples in results.items(): | |
for rank_list, rank_vals in zip(data[size], samples): | |
rank_list += rank_vals | |
return data | |
def show_plot(): | |
rows = 2 | |
cols = 4 | |
fix, axs = plt.subplots(rows, cols) | |
samples = load_data()[100000] | |
for i, n_i_vals in enumerate(samples): | |
buckets = numpy.histogram_bin_edges(n_i_vals, bins=50, range=(0, 0.00025)) | |
row = i // 4 | |
col = i % 4 | |
print(i, row, col) | |
axs[row, col].hist(n_i_vals, buckets, facecolor='g', density=True, stacked=True) | |
xs = numpy.linspace(0, 0.00025, 100) | |
y_pdf = scipy.stats.beta(a=i+1, b=100000-(i+1)+1).pdf(xs) | |
axs[row, col].plot(xs, y_pdf) | |
axs[row, col].grid(False) | |
axs[row, col].set_yticklabels([]) | |
plt.show() | |
if __name__ == "__main__": | |
show_plot() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from matplotlib import pyplot as plt | |
import numpy | |
import scipy.stats | |
from scipy.special import gamma | |
import os | |
# window 790x634 (check this with xwininfo) | |
# plot sliders: | |
# left 0.01 | |
# bottom 0.05 | |
# right 0.99 | |
# top 0.95 | |
# wspace 0.20 | |
# hspace 0.38 | |
plt.rc('font', size=8) | |
network_sizes = (17, 1000, 250000) | |
sample_sizes = (10, 100, 2000) | |
lsq_data = {} | |
avg_data = {} | |
for n in network_sizes: | |
for s in sample_sizes: | |
lsq_data[n, s] = [] | |
avg_data[n, s] = [] | |
for dirent in os.scandir('size_data'): | |
with open('size_data/'+dirent.name) as f: | |
print("reading " + dirent.name) | |
contents = eval(f.read()) # fuck a pickle | |
for n in network_sizes: | |
for s in sample_sizes: | |
lsq_data[n, s] += contents[n, s]['lsq'] | |
avg_data[n, s] += contents[n, s]['avg'] | |
for n in network_sizes: | |
for s in sample_sizes: | |
avg_data[n, s].sort() | |
lsq_data[n, s].sort() | |
#plt.hist(avg_data[17, 2000], bins=range(12, 22), facecolor='blue', alpha=0.4) | |
#plt.hist(lsq_data[17, 2000], bins=range(12, 22), facecolor='green', alpha=0.6) | |
#plt.show() | |
rows = 3 | |
cols = 3 | |
fig, axs = plt.subplots(rows, cols) | |
for i, n in enumerate(network_sizes): | |
for j, s in enumerate(sample_sizes): | |
print() | |
print(n, s) | |
if n == 17: | |
buckets = tuple(range(12, 23, 1)) | |
xticks = tuple(range(12, 23, 1)) | |
elif n == 1000: | |
if s == 10: | |
buckets = tuple(range(650, 1551, 2)) | |
xticks = tuple(range(700, 1551, 150)) | |
else: | |
buckets = tuple(range(850, 1151, 2)) | |
xticks = tuple(range(850, 1151, 50)) | |
elif n == 250000: | |
if s == 10: | |
buckets = tuple(range(165000, 375001, 300)) | |
xticks = tuple(range(170000, 370001, 40000)) | |
else: | |
buckets = tuple(range(210000, 290001, 300)) | |
xticks = tuple(range(210000, 290001, 20000)) | |
else: | |
print(n) | |
assert False | |
print(n, s, sum(lsq_data[n, s])/len(lsq_data[n, s])) | |
axs[i, j].hist(lsq_data[n, s], buckets, facecolor='g', alpha=0.6, density=True) | |
axs[i, j].hist(avg_data[n, s], buckets, facecolor='b', alpha=0.4, density=True) | |
axs[i, j].grid(True) | |
axs[i, j].set_title(f"{n} peers, {s} lookups") | |
axs[i, j].set_xticks(xticks) | |
axs[i, j].set_yticks([]) | |
if n == 250000: | |
axs[i, j].set_xticklabels([str(num)[:-3] + 'k' for num in xticks]) | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from multiprocessing import Process, Event | |
import os | |
import datetime | |
import random | |
import json | |
import heapq | |
import itertools | |
import pprint | |
L = 25 | |
k = 8 | |
POW_2 = 2**L | |
network_sizes = (17, 1000, 250000) | |
sample_sizes = (10, 100, 2000) | |
class PrefixTreeNode: | |
# prefix tree implementation, with size-capped buckets of values at leaves | |
def __init__(self, radix: int, bucket_size: int): | |
self.bucket_size = bucket_size | |
self.radix = radix | |
self.bit = 2**radix | |
self.contents = [] | |
self.left_child = None # 0 branch | |
self.right_child = None # 1 branch | |
def insert(self, addr: int): | |
if self.contents is None: | |
if (addr & self.bit) == 0: | |
self.left_child.insert(addr) | |
else: | |
self.right_child.insert(addr) | |
else: | |
self.contents.append(addr) | |
if len(self.contents) > self.bucket_size: | |
if self.radix == 0: | |
raise Exception("tried to split leaf bucket. duplicate insert?") | |
contents = self.contents | |
self.contents = None | |
self.left_child = PrefixTreeNode(self.radix - 1, self.bucket_size) | |
self.right_child = PrefixTreeNode(self.radix - 1, self.bucket_size) | |
for addr in contents: | |
self.insert(addr) | |
def query(self, addr: int, k: int): | |
return tuple(itertools.islice(self._query(addr), k)) | |
def _query(self, addr): | |
if self.contents is None: | |
if (addr & self.bit) == 0: | |
yield from self.left_child._query(addr) | |
yield from self.right_child._query(addr) | |
else: | |
yield from self.right_child._query(addr) | |
yield from self.left_child._query(addr) | |
else: | |
yield from sorted(self.contents, key=lambda item: item ^ addr) | |
lsq_results = {n: {m: [] for m in sample_sizes} for n in network_sizes} | |
avg_results = {n: {m: [] for m in sample_sizes} for n in network_sizes} | |
def make_tree(size): | |
tree = PrefixTreeNode(L, bucket_size=50) | |
for _ in range(size): | |
tree.insert(random.randrange(0, POW_2)) | |
return tree | |
def make_estimates(net_size, num_lookups, num_repeats=1000): | |
lsq_results = [] | |
avg_results = [] | |
tree = None | |
for _ in range(num_repeats): | |
if tree is None or net_size == 17: | |
tree = make_tree(net_size) | |
d_samples = [] | |
for _ in range(num_lookups): | |
addr = random.randrange(0, POW_2) | |
distances = [(d^addr)/POW_2 for d in tree.query(addr, k)] | |
d_samples.append(distances) | |
final_distances = [sum(sample[i] for sample in d_samples) / num_lookups | |
for i in range(k)] | |
lsq_estimate = k*(k+1)*(2*k+1)/(6*sum(i*d for i, d in enumerate(final_distances, 1))) - 1 | |
lsq_results.append(lsq_estimate) | |
avg_estimate = sum(i/distance - 1 for i, distance in enumerate(final_distances, 1)) / len(final_distances) | |
avg_results.append(avg_estimate) | |
return {'lsq': lsq_results, 'avg': avg_results} | |
def worker(stop_event): | |
while not stop_event.is_set(): | |
print("Mark.") | |
results = {} | |
for net_size in network_sizes: | |
for sample_size in sample_sizes: | |
key = (net_size, sample_size) | |
results[key] = make_estimates(*key) | |
fname = "size_data/" + datetime.datetime.now().isoformat() | |
with open(fname, "w") as f: | |
pprint.pprint(results, stream=f, width=200) | |
f.write("\n") | |
if __name__ == "__main__": | |
cpus = os.cpu_count() | |
event = Event() | |
workers = [Process(target=worker, args=(event,)) for _ in range(cpus)] | |
for p in workers: | |
p.start() | |
print("Workers started.") | |
try: | |
while True: | |
pass | |
except KeyboardInterrupt: | |
print("Wrapping up...") | |
event.set() | |
for p in workers: | |
p.join() | |
print("Worker joined.") | |
print("Done.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment