mdboom · December 11, 2024 23:10
diff --git a/benchmark_times.py b/benchmark_times.py
 from collections import defaultdict
 import json
 from operator import itemgetter

 import math
 from matplotlib import pyplot as plt
 import numpy as np


 excluded = set(
    [
        # Benchmarks where the mean execution count < 5
        "aiohttp",
        "asyncio_tcp",
        "asyncio_tcp_ssl",
        "bench_mp_pool",
        "bench_thread_pool",
        "deepcopy_reduce",
        "logging_silent",
        "pickle",
        "pickle_dict",
        "pickle_list",
        "unpack_sequence",
        "unpickle",
        "unpickle_list",
    ]
 )


 contents = json.load(open("results.json"))

 data = {}
 totals = {}
 simple_totals = {}

 for benchmark in contents["benchmarks"]:
    metadata = benchmark["metadata"]
    name = metadata["name"]
    if name not in excluded:
        loops = metadata["loops"]
        row = []
        total = 0.0
        simple_total = 0.0
        for run in benchmark["runs"]:
            # We can (a) treat each sample independently
            # row.extend(run.get("values", []))
            # ...or (b) take the mean of the samples within each process
            if run.get("values"):
                row.append(np.mean(run.get("values")))
            total += np.sum(run.get("values", [])) * loops
            simple_total += np.sum(run.get("values", [])) * loops
            total += np.sum([x[0] * x[1] for x in run.get("warmups", [])])
        data[name] = np.array(row, dtype=np.float64)
        totals[name] = total
        simple_totals[name] = simple_total

 stddevs = [(k, np.std(v) / np.mean(v)) for k, v in data.items()]
 stddevs.sort(key=itemgetter(1))

 fig, ax = plt.subplots(figsize=(8, len(stddevs) * 0.2), layout="constrained")
 y_pos = np.arange(len(stddevs))
 ax.barh(y_pos, [x[1] for x in stddevs], align="center")
 ax.set_yticks(y_pos, [x[0] for x in stddevs])
 ax.set_xlabel("Normalized standard deviation")
 fig.savefig("stddev.png")


 samples = [
    (k, np.ceil((4 * (1.96 * (np.std(v) / np.mean(v)) ** 2)) / (0.01**2)), len(v))
    for k, v in data.items()
 ]
 samples.sort(key=itemgetter(1))


 def get_color(nsamples, length):
    if nsamples > length:
        return "red"
    elif nsamples == 1:
        return "green"
    else:
        return "yellow"


 fig, ax = plt.subplots(figsize=(8, len(samples) * 0.2), layout="constrained")
 y_pos = np.arange(len(samples))
 ax.barh(
    y_pos,
    [x[1] for x in samples],
    align="center",
    color=[get_color(x[1], x[2]) for x in samples],
 )
 ax.set_yticks(y_pos, [x[0] for x in samples])
 ax.set_xlim([0, 70])
 ax.set_xlabel("# samples required for 95% certainly of 1% error")
 ax.grid()
 fig.savefig("samples.png")


 savings_map = {}
 savings = 0
 samples = {x[0]: (x[1], x[2]) for x in samples}
 for name, total in simple_totals.items():
    sample = samples[name]
    if sample[0] < sample[1]:
        saving = total * (1.0 - (sample[0] / sample[1]))
    else:
        saving = 0
    savings_map[name] = saving
    savings += saving

 totals = list([(k, v, v - savings_map[k]) for k, v in totals.items()])
 totals.sort(key=itemgetter(1))

 fig, ax = plt.subplots(figsize=(8, len(totals) * 0.2), layout="constrained")
 y_pos = np.arange(len(totals))
 ax.barh(y_pos, [x[1] for x in totals], align="center")
 ax.barh(y_pos, [x[2] for x in totals], align="center")
 ax.set_yticks(y_pos, [x[0] for x in totals])
 # ax.set_xscale("log")
 fig.savefig("totals.png")

 print("Grand total:", np.sum([x[1] for x in totals]))
 print("Potential savings", savings)


 fig, ax = plt.subplots()
 ax.plot(data["pylint"])
	from collections import defaultdict
	import json
	from operator import itemgetter

	import math
	from matplotlib import pyplot as plt
	import numpy as np


	excluded = set(
	[
	# Benchmarks where the mean execution count < 5
	"aiohttp",
	"asyncio_tcp",
	"asyncio_tcp_ssl",
	"bench_mp_pool",
	"bench_thread_pool",
	"deepcopy_reduce",
	"logging_silent",
	"pickle",
	"pickle_dict",
	"pickle_list",
	"unpack_sequence",
	"unpickle",
	"unpickle_list",
	]
	)


	contents = json.load(open("results.json"))

	data = {}
	totals = {}
	simple_totals = {}

	for benchmark in contents["benchmarks"]:
	metadata = benchmark["metadata"]
	name = metadata["name"]
	if name not in excluded:
	loops = metadata["loops"]
	row = []
	total = 0.0
	simple_total = 0.0
	for run in benchmark["runs"]:
	# We can (a) treat each sample independently
	# row.extend(run.get("values", []))
	# ...or (b) take the mean of the samples within each process
	if run.get("values"):
	row.append(np.mean(run.get("values")))
	total += np.sum(run.get("values", [])) * loops
	simple_total += np.sum(run.get("values", [])) * loops
	total += np.sum([x[0] * x[1] for x in run.get("warmups", [])])
	data[name] = np.array(row, dtype=np.float64)
	totals[name] = total
	simple_totals[name] = simple_total

	stddevs = [(k, np.std(v) / np.mean(v)) for k, v in data.items()]
	stddevs.sort(key=itemgetter(1))

	fig, ax = plt.subplots(figsize=(8, len(stddevs) * 0.2), layout="constrained")
	y_pos = np.arange(len(stddevs))
	ax.barh(y_pos, [x[1] for x in stddevs], align="center")
	ax.set_yticks(y_pos, [x[0] for x in stddevs])
	ax.set_xlabel("Normalized standard deviation")
	fig.savefig("stddev.png")


	samples = [
	(k, np.ceil((4 * (1.96 * (np.std(v) / np.mean(v)) 2)) / (0.012)), len(v))
	for k, v in data.items()
	]
	samples.sort(key=itemgetter(1))


	def get_color(nsamples, length):
	if nsamples > length:
	return "red"
	elif nsamples == 1:
	return "green"
	else:
	return "yellow"


	fig, ax = plt.subplots(figsize=(8, len(samples) * 0.2), layout="constrained")
	y_pos = np.arange(len(samples))
	ax.barh(
	y_pos,
	[x[1] for x in samples],
	align="center",
	color=[get_color(x[1], x[2]) for x in samples],
	)
	ax.set_yticks(y_pos, [x[0] for x in samples])
	ax.set_xlim([0, 70])
	ax.set_xlabel("# samples required for 95% certainly of 1% error")
	ax.grid()
	fig.savefig("samples.png")


	savings_map = {}
	savings = 0
	samples = {x[0]: (x[1], x[2]) for x in samples}
	for name, total in simple_totals.items():
	sample = samples[name]
	if sample[0] < sample[1]:
	saving = total * (1.0 - (sample[0] / sample[1]))
	else:
	saving = 0
	savings_map[name] = saving
	savings += saving

	totals = list([(k, v, v - savings_map[k]) for k, v in totals.items()])
	totals.sort(key=itemgetter(1))

	fig, ax = plt.subplots(figsize=(8, len(totals) * 0.2), layout="constrained")
	y_pos = np.arange(len(totals))
	ax.barh(y_pos, [x[1] for x in totals], align="center")
	ax.barh(y_pos, [x[2] for x in totals], align="center")
	ax.set_yticks(y_pos, [x[0] for x in totals])
	# ax.set_xscale("log")
	fig.savefig("totals.png")

	print("Grand total:", np.sum([x[1] for x in totals]))
	print("Potential savings", savings)


	fig, ax = plt.subplots()
	ax.plot(data["pylint"])