lemire · October 8, 2025 19:16
diff --git a/perfregress.py b/perfregress.py
 import subprocess
 import json
 import os
 import sys
 from datetime import datetime
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 from collections import defaultdict
 import tempfile

 # Configuration
 START_COMMIT = "d3b85c149b2d3c9e3343cfac860732a640486bba"
 REPO_DIR = "."  # Assuming script runs in repo root
 BUILD_DIR = "build"
 BENCHMARKS_DIR = os.path.join(BUILD_DIR, "microbenchmarks")
 RESULTS_DIR = "benchmark_results"

 def run_command(cmd, cwd=REPO_DIR, check=True):
    """Run a shell command and return output."""
    try:
        result = subprocess.run(cmd, shell=True, cwd=cwd, capture_output=True, text=True, check=check)
        return result.stdout.strip()
    except subprocess.CalledProcessError as e:
        print(f"Error running '{cmd}': {e.stderr}")
        sys.exit(1)

 def get_commits(start_commit, end_branch="master"):
    """Get list of commits from start to end in chronological order."""
    cmd = f"git rev-list --reverse {start_commit}..{end_branch}"
    output = run_command(cmd)
    return output.splitlines() if output else []

 def get_commit_timestamp(commit_hash):
    """Get Unix timestamp for a commit."""
    cmd = f"git log -1 --format=%ct {commit_hash}"
    timestamp_str = run_command(cmd)
    return int(timestamp_str) if timestamp_str else 0

 def get_commit_datetime(commit_hash):
    """Get datetime object for a commit."""
    timestamp = get_commit_timestamp(commit_hash)
    return datetime.fromtimestamp(timestamp)

 def main():
    if not os.path.exists(BUILD_DIR):
        os.makedirs(BUILD_DIR)
    if not os.path.exists(RESULTS_DIR):
        os.makedirs(RESULTS_DIR)

    commits = get_commits(START_COMMIT)
    if not commits:
        print("No commits found between start and master.")
        sys.exit(1)

    print(f"Found {len(commits)} commits to process.")

    # Checkout start commit and initial cmake
    run_command(f"git checkout {START_COMMIT}")
    run_command("cmake -B build -D ENABLE_ROARING_MICROBENCHMARKS=ON")

    benchmark_data = defaultdict(list)  # benchmark_name -> list of (datetime, time_ns)

    for i, commit in enumerate(commits):
        print(f"Processing commit {i+1}/{len(commits)}: {commit}")

        # Checkout commit
        run_command(f"git checkout {commit}")

        # Build
        run_command("cmake --build build --target bench")

        # Get timestamp for filename
        timestamp = get_commit_timestamp(commit)
        timestamp_str = datetime.fromtimestamp(timestamp).strftime("%Y%m%d_%H%M%S")
        json_file = os.path.join(RESULTS_DIR, f"results{timestamp_str}.json")

        # Run benchmark
        bench_cmd = f"./{BENCHMARKS_DIR}/bench --benchmark_out_format=json --benchmark_out={json_file}"
        run_command(bench_cmd)

        # Parse JSON
        if os.path.exists(json_file):
            with open(json_file, 'r') as f:
                data = json.load(f)

            commit_dt = get_commit_datetime(commit)

            # Google Benchmark JSON structure: benchmarks array
            for benchmark in data.get("benchmarks", []):
                name = benchmark.get("name", "")
                # Use cpu_time if available, else real_time
                time_ns = benchmark.get("cpu_time", benchmark.get("real_time", 0))
                if isinstance(time_ns, (int, float)):
                    benchmark_data[name].append((commit_dt, float(time_ns)))
                else:
                    print(f"Warning: Invalid time for {name} in {json_file}")
        else:
            print(f"Warning: JSON file {json_file} not created.")

    # Generate plots
    pdf_names = []
    for bench_name, points in benchmark_data.items():
        if len(points) < 2:
            print(f"Skipping {bench_name}: insufficient data points.")
            continue

        points.sort(key=lambda x: x[0])  # Sort by datetime
        dates, times = zip(*points)

        plt.figure(figsize=(10, 6))
        plt.plot(dates, times, marker='o', linestyle='-', markersize=4)
        plt.title(f"Benchmark: {bench_name}")
        plt.xlabel("Commit Date")
        plt.ylabel("Time (nanoseconds)")
        plt.ylim(bottom=0)  # Start y-axis at zero
        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
        plt.gca().xaxis.set_major_locator(mdates.WeekdayLocator())
        plt.xticks(rotation=45)
        plt.tight_layout()

        pdf_name = f"{bench_name}_benchmark.pdf"
        plt.savefig(pdf_name, format='pdf')
        plt.close()
        pdf_names.append(pdf_name)
        print(f"Generated: {pdf_name}")

    if pdf_names:
        print("\nGenerated PDF files:")
        for name in pdf_names:
            print(f"- {name}")
    else:
        print("No plots generated.")

 if __name__ == "__main__":
    main()
	import subprocess
	import json
	import os
	import sys
	from datetime import datetime
	import matplotlib.pyplot as plt
	import matplotlib.dates as mdates
	from collections import defaultdict
	import tempfile

	# Configuration
	START_COMMIT = "d3b85c149b2d3c9e3343cfac860732a640486bba"
	REPO_DIR = "." # Assuming script runs in repo root
	BUILD_DIR = "build"
	BENCHMARKS_DIR = os.path.join(BUILD_DIR, "microbenchmarks")
	RESULTS_DIR = "benchmark_results"

	def run_command(cmd, cwd=REPO_DIR, check=True):
	"""Run a shell command and return output."""
	try:
	result = subprocess.run(cmd, shell=True, cwd=cwd, capture_output=True, text=True, check=check)
	return result.stdout.strip()
	except subprocess.CalledProcessError as e:
	print(f"Error running '{cmd}': {e.stderr}")
	sys.exit(1)

	def get_commits(start_commit, end_branch="master"):
	"""Get list of commits from start to end in chronological order."""
	cmd = f"git rev-list --reverse {start_commit}..{end_branch}"
	output = run_command(cmd)
	return output.splitlines() if output else []

	def get_commit_timestamp(commit_hash):
	"""Get Unix timestamp for a commit."""
	cmd = f"git log -1 --format=%ct {commit_hash}"
	timestamp_str = run_command(cmd)
	return int(timestamp_str) if timestamp_str else 0

	def get_commit_datetime(commit_hash):
	"""Get datetime object for a commit."""
	timestamp = get_commit_timestamp(commit_hash)
	return datetime.fromtimestamp(timestamp)

	def main():
	if not os.path.exists(BUILD_DIR):
	os.makedirs(BUILD_DIR)
	if not os.path.exists(RESULTS_DIR):
	os.makedirs(RESULTS_DIR)

	commits = get_commits(START_COMMIT)
	if not commits:
	print("No commits found between start and master.")
	sys.exit(1)

	print(f"Found {len(commits)} commits to process.")

	# Checkout start commit and initial cmake
	run_command(f"git checkout {START_COMMIT}")
	run_command("cmake -B build -D ENABLE_ROARING_MICROBENCHMARKS=ON")

	benchmark_data = defaultdict(list) # benchmark_name -> list of (datetime, time_ns)

	for i, commit in enumerate(commits):
	print(f"Processing commit {i+1}/{len(commits)}: {commit}")

	# Checkout commit
	run_command(f"git checkout {commit}")

	# Build
	run_command("cmake --build build --target bench")

	# Get timestamp for filename
	timestamp = get_commit_timestamp(commit)
	timestamp_str = datetime.fromtimestamp(timestamp).strftime("%Y%m%d_%H%M%S")
	json_file = os.path.join(RESULTS_DIR, f"results{timestamp_str}.json")

	# Run benchmark
	bench_cmd = f"./{BENCHMARKS_DIR}/bench --benchmark_out_format=json --benchmark_out={json_file}"
	run_command(bench_cmd)

	# Parse JSON
	if os.path.exists(json_file):
	with open(json_file, 'r') as f:
	data = json.load(f)

	commit_dt = get_commit_datetime(commit)

	# Google Benchmark JSON structure: benchmarks array
	for benchmark in data.get("benchmarks", []):
	name = benchmark.get("name", "")
	# Use cpu_time if available, else real_time
	time_ns = benchmark.get("cpu_time", benchmark.get("real_time", 0))
	if isinstance(time_ns, (int, float)):
	benchmark_data[name].append((commit_dt, float(time_ns)))
	else:
	print(f"Warning: Invalid time for {name} in {json_file}")
	else:
	print(f"Warning: JSON file {json_file} not created.")

	# Generate plots
	pdf_names = []
	for bench_name, points in benchmark_data.items():
	if len(points) < 2:
	print(f"Skipping {bench_name}: insufficient data points.")
	continue

	points.sort(key=lambda x: x[0]) # Sort by datetime
	dates, times = zip(*points)

	plt.figure(figsize=(10, 6))
	plt.plot(dates, times, marker='o', linestyle='-', markersize=4)
	plt.title(f"Benchmark: {bench_name}")
	plt.xlabel("Commit Date")
	plt.ylabel("Time (nanoseconds)")
	plt.ylim(bottom=0) # Start y-axis at zero
	plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
	plt.gca().xaxis.set_major_locator(mdates.WeekdayLocator())
	plt.xticks(rotation=45)
	plt.tight_layout()

	pdf_name = f"{bench_name}_benchmark.pdf"
	plt.savefig(pdf_name, format='pdf')
	plt.close()
	pdf_names.append(pdf_name)
	print(f"Generated: {pdf_name}")

	if pdf_names:
	print("\nGenerated PDF files:")
	for name in pdf_names:
	print(f"- {name}")
	else:
	print("No plots generated.")

	if __name__ == "__main__":
	main()
No results found