jmbr · March 24, 2026 15:46
diff --git a/bench_formatting.py b/bench_formatting.py
 #!/usr/bin/env python3
 """String formatting benchmarks for paulimer.

 Prints JSON {name: µs, ...} to stdout. Intended for use with benchmark.py.

 Usage:
    python benchmark.py bench_formatting.py --build "maturin develop --release -m paulimer/bindings/python/Cargo.toml"
 """

 import json
 import timeit

 from paulimer import CliffordUnitary, DensePauli

 benchmarks = {}

 p300 = DensePauli("X" * 100 + "Y" * 100 + "Z" * 100)
 p1000 = DensePauli("X" * 1000)
 c10 = CliffordUnitary.identity(10)
 c50 = CliffordUnitary.identity(50)
 c10_str = str(c10)

 cases = [
    ("DensePauli(300q) str", lambda: str(p300), 100000),
    ("DensePauli(1000q) str", lambda: str(p1000), 50000),
    ("DensePauli(300q) parse", lambda: DensePauli("X" * 300), 50000),
    ("Clifford(10q) str", lambda: str(c10), 50000),
    ("Clifford(50q) str", lambda: str(c50), 10000),
    ("Clifford(10q) parse", lambda: CliffordUnitary.from_string(c10_str), 50000),
 ]

 for name, fn, iterations in cases:
    t = timeit.timeit(fn, number=iterations)
    benchmarks[name] = t / iterations * 1e6

 print(json.dumps(benchmarks))
diff --git a/benchmark.py b/benchmark.py
 #!/usr/bin/env python3
 """Compare benchmark performance between two git branches.

 Interleaves A/B runs to cancel out thermal throttling and system load bias.

 The benchmark script is copied to a temporary location before execution,
 so it does not need to exist on both branches. Uncommitted changes are
 stashed and restored automatically during branch switches.

 The benchmark script must print a JSON object mapping benchmark names to
 timings in microseconds, e.g.:

    {"my_bench": 1.23, "other_bench": 4.56}

 Usage:
    python benchmark.py SCRIPT [--build CMD] [--baseline BRANCH] [--candidate BRANCH] [--rounds N]

 Examples:
    python benchmark.py bench_formatting.py --build "maturin develop --release"
    python benchmark.py bench_formatting.py --build "cargo build --release" --baseline main --rounds 7
 """

 import argparse
 import json
 import subprocess
 import sys
 import tempfile
 from collections import defaultdict
 from statistics import median


 def current_branch():
    result = subprocess.run(
        ["git", "branch", "--show-current"],
        capture_output=True,
        text=True,
        check=True,
    )
    return result.stdout.strip()


 def run_one_round(branch, script, build_command):
    original = current_branch()
    need_checkout = branch != original

    if need_checkout:
        subprocess.run(
            ["git", "stash", "--include-untracked"], capture_output=True, check=True
        )
        subprocess.run(["git", "checkout", branch], capture_output=True, check=True)

    if build_command:
        subprocess.run(
            build_command,
            shell=True,
            capture_output=True,
            check=True,
        )

    result = subprocess.run(
        [sys.executable, str(script)],
        capture_output=True,
        text=True,
        check=True,
    )

    if need_checkout:
        subprocess.run(["git", "checkout", original], capture_output=True, check=True)
        subprocess.run(["git", "stash", "pop"], capture_output=True)

    return json.loads(result.stdout)


 def main():
    parser = argparse.ArgumentParser(
        description="Compare benchmark performance between two git branches.",
    )
    parser.add_argument(
        "script",
        type=argparse.FileType("r"),
        help="Benchmark script that prints JSON {name: µs, ...} to stdout",
    )
    parser.add_argument(
        "--build",
        default=None,
        help="Build command to run after each checkout (e.g., 'maturin develop --release')",
    )
    parser.add_argument(
        "--baseline", default="main", help="Baseline branch (default: main)"
    )
    parser.add_argument(
        "--candidate", default=None, help="Candidate branch (default: current)"
    )
    parser.add_argument(
        "--rounds", type=int, default=5, help="Interleaved A/B rounds (default: 5)"
    )
    args = parser.parse_args()

    candidate = args.candidate or current_branch()

    with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=True) as tmp:
        tmp.write(args.script.read())
        args.script.close()
        tmp.flush()
        temp_script = tmp.name

        print(f"Script:    {args.script.name}")
        print(f"Build:     {args.build or '(none)'}")
        print(f"Baseline:  {args.baseline}")
        print(f"Candidate: {candidate}")
        print(f"Rounds:    {args.rounds} (interleaved A/B)")
        print()

        baseline_samples = defaultdict(list)
        candidate_samples = defaultdict(list)

        for i in range(args.rounds):
            print(f"Round {i + 1}/{args.rounds}: ", end="", flush=True)

            print("baseline... ", end="", flush=True)
            base = run_one_round(args.baseline, temp_script, args.build)
            for name, val in base.items():
                baseline_samples[name].append(val)

            print("candidate... ", end="", flush=True)
            cand = run_one_round(candidate, temp_script, args.build)
            for name, val in cand.items():
                candidate_samples[name].append(val)

            print("done.")

    print()
    header = (
        f"{'Benchmark':<26} {'Baseline (µs)':>14} {'Candidate (µs)':>15} {'Delta':>8}"
    )
    print(header)
    print("-" * len(header))

    for name in baseline_samples:
        base_med = median(baseline_samples[name])
        cand_med = median(candidate_samples[name])
        delta_pct = (cand_med - base_med) / base_med * 100
        sign = "+" if delta_pct > 0 else ""
        print(
            f"{name:<26} {base_med:>13.2f} {cand_med:>14.2f} {sign}{delta_pct:>7.1f}%"
        )


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""String formatting benchmarks for paulimer.

	Prints JSON {name: µs, ...} to stdout. Intended for use with benchmark.py.

	Usage:
	python benchmark.py bench_formatting.py --build "maturin develop --release -m paulimer/bindings/python/Cargo.toml"
	"""

	import json
	import timeit

	from paulimer import CliffordUnitary, DensePauli

	benchmarks = {}

	p300 = DensePauli("X" * 100 + "Y" * 100 + "Z" * 100)
	p1000 = DensePauli("X" * 1000)
	c10 = CliffordUnitary.identity(10)
	c50 = CliffordUnitary.identity(50)
	c10_str = str(c10)

	cases = [
	("DensePauli(300q) str", lambda: str(p300), 100000),
	("DensePauli(1000q) str", lambda: str(p1000), 50000),
	("DensePauli(300q) parse", lambda: DensePauli("X" * 300), 50000),
	("Clifford(10q) str", lambda: str(c10), 50000),
	("Clifford(50q) str", lambda: str(c50), 10000),
	("Clifford(10q) parse", lambda: CliffordUnitary.from_string(c10_str), 50000),
	]

	for name, fn, iterations in cases:
	t = timeit.timeit(fn, number=iterations)
	benchmarks[name] = t / iterations * 1e6

	print(json.dumps(benchmarks))
	#!/usr/bin/env python3
	"""Compare benchmark performance between two git branches.

	Interleaves A/B runs to cancel out thermal throttling and system load bias.

	The benchmark script is copied to a temporary location before execution,
	so it does not need to exist on both branches. Uncommitted changes are
	stashed and restored automatically during branch switches.

	The benchmark script must print a JSON object mapping benchmark names to
	timings in microseconds, e.g.:

	{"my_bench": 1.23, "other_bench": 4.56}

	Usage:
	python benchmark.py SCRIPT [--build CMD] [--baseline BRANCH] [--candidate BRANCH] [--rounds N]

	Examples:
	python benchmark.py bench_formatting.py --build "maturin develop --release"
	python benchmark.py bench_formatting.py --build "cargo build --release" --baseline main --rounds 7
	"""

	import argparse
	import json
	import subprocess
	import sys
	import tempfile
	from collections import defaultdict
	from statistics import median


	def current_branch():
	result = subprocess.run(
	["git", "branch", "--show-current"],
	capture_output=True,
	text=True,
	check=True,
	)
	return result.stdout.strip()


	def run_one_round(branch, script, build_command):
	original = current_branch()
	need_checkout = branch != original

	if need_checkout:
	subprocess.run(
	["git", "stash", "--include-untracked"], capture_output=True, check=True
	)
	subprocess.run(["git", "checkout", branch], capture_output=True, check=True)

	if build_command:
	subprocess.run(
	build_command,
	shell=True,
	capture_output=True,
	check=True,
	)

	result = subprocess.run(
	[sys.executable, str(script)],
	capture_output=True,
	text=True,
	check=True,
	)

	if need_checkout:
	subprocess.run(["git", "checkout", original], capture_output=True, check=True)
	subprocess.run(["git", "stash", "pop"], capture_output=True)

	return json.loads(result.stdout)


	def main():
	parser = argparse.ArgumentParser(
	description="Compare benchmark performance between two git branches.",
	)
	parser.add_argument(
	"script",
	type=argparse.FileType("r"),
	help="Benchmark script that prints JSON {name: µs, ...} to stdout",
	)
	parser.add_argument(
	"--build",
	default=None,
	help="Build command to run after each checkout (e.g., 'maturin develop --release')",
	)
	parser.add_argument(
	"--baseline", default="main", help="Baseline branch (default: main)"
	)
	parser.add_argument(
	"--candidate", default=None, help="Candidate branch (default: current)"
	)
	parser.add_argument(
	"--rounds", type=int, default=5, help="Interleaved A/B rounds (default: 5)"
	)
	args = parser.parse_args()

	candidate = args.candidate or current_branch()

	with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=True) as tmp:
	tmp.write(args.script.read())
	args.script.close()
	tmp.flush()
	temp_script = tmp.name

	print(f"Script: {args.script.name}")
	print(f"Build: {args.build or '(none)'}")
	print(f"Baseline: {args.baseline}")
	print(f"Candidate: {candidate}")
	print(f"Rounds: {args.rounds} (interleaved A/B)")
	print()

	baseline_samples = defaultdict(list)
	candidate_samples = defaultdict(list)

	for i in range(args.rounds):
	print(f"Round {i + 1}/{args.rounds}: ", end="", flush=True)

	print("baseline... ", end="", flush=True)
	base = run_one_round(args.baseline, temp_script, args.build)
	for name, val in base.items():
	baseline_samples[name].append(val)

	print("candidate... ", end="", flush=True)
	cand = run_one_round(candidate, temp_script, args.build)
	for name, val in cand.items():
	candidate_samples[name].append(val)

	print("done.")

	print()
	header = (
	f"{'Benchmark':<26} {'Baseline (µs)':>14} {'Candidate (µs)':>15} {'Delta':>8}"
	)
	print(header)
	print("-" * len(header))

	for name in baseline_samples:
	base_med = median(baseline_samples[name])
	cand_med = median(candidate_samples[name])
	delta_pct = (cand_med - base_med) / base_med * 100
	sign = "+" if delta_pct > 0 else ""
	print(
	f"{name:<26} {base_med:>13.2f} {cand_med:>14.2f} {sign}{delta_pct:>7.1f}%"
	)


	if __name__ == "__main__":
	main()