Skip to content

Instantly share code, notes, and snippets.

@mdboom
Created December 11, 2024 23:10
Show Gist options
  • Save mdboom/1709ad21760121ad2ea474e939ed87a6 to your computer and use it in GitHub Desktop.
Save mdboom/1709ad21760121ad2ea474e939ed87a6 to your computer and use it in GitHub Desktop.
from collections import defaultdict
import json
from operator import itemgetter
import math
from matplotlib import pyplot as plt
import numpy as np
excluded = set(
[
# Benchmarks where the mean execution count < 5
"aiohttp",
"asyncio_tcp",
"asyncio_tcp_ssl",
"bench_mp_pool",
"bench_thread_pool",
"deepcopy_reduce",
"logging_silent",
"pickle",
"pickle_dict",
"pickle_list",
"unpack_sequence",
"unpickle",
"unpickle_list",
]
)
contents = json.load(open("results.json"))
data = {}
totals = {}
simple_totals = {}
for benchmark in contents["benchmarks"]:
metadata = benchmark["metadata"]
name = metadata["name"]
if name not in excluded:
loops = metadata["loops"]
row = []
total = 0.0
simple_total = 0.0
for run in benchmark["runs"]:
# We can (a) treat each sample independently
# row.extend(run.get("values", []))
# ...or (b) take the mean of the samples within each process
if run.get("values"):
row.append(np.mean(run.get("values")))
total += np.sum(run.get("values", [])) * loops
simple_total += np.sum(run.get("values", [])) * loops
total += np.sum([x[0] * x[1] for x in run.get("warmups", [])])
data[name] = np.array(row, dtype=np.float64)
totals[name] = total
simple_totals[name] = simple_total
stddevs = [(k, np.std(v) / np.mean(v)) for k, v in data.items()]
stddevs.sort(key=itemgetter(1))
fig, ax = plt.subplots(figsize=(8, len(stddevs) * 0.2), layout="constrained")
y_pos = np.arange(len(stddevs))
ax.barh(y_pos, [x[1] for x in stddevs], align="center")
ax.set_yticks(y_pos, [x[0] for x in stddevs])
ax.set_xlabel("Normalized standard deviation")
fig.savefig("stddev.png")
samples = [
(k, np.ceil((4 * (1.96 * (np.std(v) / np.mean(v)) ** 2)) / (0.01**2)), len(v))
for k, v in data.items()
]
samples.sort(key=itemgetter(1))
def get_color(nsamples, length):
if nsamples > length:
return "red"
elif nsamples == 1:
return "green"
else:
return "yellow"
fig, ax = plt.subplots(figsize=(8, len(samples) * 0.2), layout="constrained")
y_pos = np.arange(len(samples))
ax.barh(
y_pos,
[x[1] for x in samples],
align="center",
color=[get_color(x[1], x[2]) for x in samples],
)
ax.set_yticks(y_pos, [x[0] for x in samples])
ax.set_xlim([0, 70])
ax.set_xlabel("# samples required for 95% certainly of 1% error")
ax.grid()
fig.savefig("samples.png")
savings_map = {}
savings = 0
samples = {x[0]: (x[1], x[2]) for x in samples}
for name, total in simple_totals.items():
sample = samples[name]
if sample[0] < sample[1]:
saving = total * (1.0 - (sample[0] / sample[1]))
else:
saving = 0
savings_map[name] = saving
savings += saving
totals = list([(k, v, v - savings_map[k]) for k, v in totals.items()])
totals.sort(key=itemgetter(1))
fig, ax = plt.subplots(figsize=(8, len(totals) * 0.2), layout="constrained")
y_pos = np.arange(len(totals))
ax.barh(y_pos, [x[1] for x in totals], align="center")
ax.barh(y_pos, [x[2] for x in totals], align="center")
ax.set_yticks(y_pos, [x[0] for x in totals])
# ax.set_xscale("log")
fig.savefig("totals.png")
print("Grand total:", np.sum([x[1] for x in totals]))
print("Potential savings", savings)
fig, ax = plt.subplots()
ax.plot(data["pylint"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment