Created
December 11, 2024 23:10
-
-
Save mdboom/1709ad21760121ad2ea474e939ed87a6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
import json | |
from operator import itemgetter | |
import math | |
from matplotlib import pyplot as plt | |
import numpy as np | |
excluded = set( | |
[ | |
# Benchmarks where the mean execution count < 5 | |
"aiohttp", | |
"asyncio_tcp", | |
"asyncio_tcp_ssl", | |
"bench_mp_pool", | |
"bench_thread_pool", | |
"deepcopy_reduce", | |
"logging_silent", | |
"pickle", | |
"pickle_dict", | |
"pickle_list", | |
"unpack_sequence", | |
"unpickle", | |
"unpickle_list", | |
] | |
) | |
contents = json.load(open("results.json")) | |
data = {} | |
totals = {} | |
simple_totals = {} | |
for benchmark in contents["benchmarks"]: | |
metadata = benchmark["metadata"] | |
name = metadata["name"] | |
if name not in excluded: | |
loops = metadata["loops"] | |
row = [] | |
total = 0.0 | |
simple_total = 0.0 | |
for run in benchmark["runs"]: | |
# We can (a) treat each sample independently | |
# row.extend(run.get("values", [])) | |
# ...or (b) take the mean of the samples within each process | |
if run.get("values"): | |
row.append(np.mean(run.get("values"))) | |
total += np.sum(run.get("values", [])) * loops | |
simple_total += np.sum(run.get("values", [])) * loops | |
total += np.sum([x[0] * x[1] for x in run.get("warmups", [])]) | |
data[name] = np.array(row, dtype=np.float64) | |
totals[name] = total | |
simple_totals[name] = simple_total | |
stddevs = [(k, np.std(v) / np.mean(v)) for k, v in data.items()] | |
stddevs.sort(key=itemgetter(1)) | |
fig, ax = plt.subplots(figsize=(8, len(stddevs) * 0.2), layout="constrained") | |
y_pos = np.arange(len(stddevs)) | |
ax.barh(y_pos, [x[1] for x in stddevs], align="center") | |
ax.set_yticks(y_pos, [x[0] for x in stddevs]) | |
ax.set_xlabel("Normalized standard deviation") | |
fig.savefig("stddev.png") | |
samples = [ | |
(k, np.ceil((4 * (1.96 * (np.std(v) / np.mean(v)) ** 2)) / (0.01**2)), len(v)) | |
for k, v in data.items() | |
] | |
samples.sort(key=itemgetter(1)) | |
def get_color(nsamples, length): | |
if nsamples > length: | |
return "red" | |
elif nsamples == 1: | |
return "green" | |
else: | |
return "yellow" | |
fig, ax = plt.subplots(figsize=(8, len(samples) * 0.2), layout="constrained") | |
y_pos = np.arange(len(samples)) | |
ax.barh( | |
y_pos, | |
[x[1] for x in samples], | |
align="center", | |
color=[get_color(x[1], x[2]) for x in samples], | |
) | |
ax.set_yticks(y_pos, [x[0] for x in samples]) | |
ax.set_xlim([0, 70]) | |
ax.set_xlabel("# samples required for 95% certainly of 1% error") | |
ax.grid() | |
fig.savefig("samples.png") | |
savings_map = {} | |
savings = 0 | |
samples = {x[0]: (x[1], x[2]) for x in samples} | |
for name, total in simple_totals.items(): | |
sample = samples[name] | |
if sample[0] < sample[1]: | |
saving = total * (1.0 - (sample[0] / sample[1])) | |
else: | |
saving = 0 | |
savings_map[name] = saving | |
savings += saving | |
totals = list([(k, v, v - savings_map[k]) for k, v in totals.items()]) | |
totals.sort(key=itemgetter(1)) | |
fig, ax = plt.subplots(figsize=(8, len(totals) * 0.2), layout="constrained") | |
y_pos = np.arange(len(totals)) | |
ax.barh(y_pos, [x[1] for x in totals], align="center") | |
ax.barh(y_pos, [x[2] for x in totals], align="center") | |
ax.set_yticks(y_pos, [x[0] for x in totals]) | |
# ax.set_xscale("log") | |
fig.savefig("totals.png") | |
print("Grand total:", np.sum([x[1] for x in totals])) | |
print("Potential savings", savings) | |
fig, ax = plt.subplots() | |
ax.plot(data["pylint"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment