julian-klode · February 4, 2026 21:01
diff --git a/evaluate.py b/evaluate.py
 # Script to compare apt performance logs
 #
 # Copyright (C) 2026 Julian Andres Klode <jak@debian.org>
 #
 # SPDX-License-Identifier: GPL-2.0+

 import sys
 import typing

 import pandas as pd

 try:
    import rich
    import rich.table
 except ImportError:
    rich = None  # type: ignore[assignment]


 # Rate values to calculate. These have some special handling
 RATES = [
    ("ipc", "instructions", "cpu_cycles"),
    ("cache_miss_rate", "cache_misses", "cache_references"),
    ("branch_miss_rate", "branch_misses", "branch_instructions"),
 ]


 def read_file(file: str) -> pd.DataFrame:
    """Read a single APT performance file"""
    with open(file) as fd:
        df = pd.read_json(fd, lines=True)
    for name, a, b in RATES:
        df[name] = df[a] / df[b]
    return df


 def extract_context(df: pd.DataFrame, context: str) -> pd.DataFrame:
    """Extract a single context from a APT performance file"""
    df = df[df["context"] == context]
    del df["context"]

    out = pd.DataFrame(
        {
            "total": df.sum(),
            "mean": df.mean(),
            "std": df.std(),
            "cv (%)": df.std() / df.mean() * 100,
        }
    )

    # Summing up the rates doesn't make much sense, calculate the total rate instead
    for name, a, b in RATES:
        out.loc[name, "total"] = df[a].sum() / df[b].sum()
    return out


 def analyze_contexts(contexts: list[pd.DataFrame]) -> pd.DataFrame:
    """Compare a bunch of data frames or just return the data frame if a single one."""
    # Single frame, nothing to compare
    if len(contexts) == 1:
        return contexts[0]

    out = pd.DataFrame()
    for r in contexts:
        out[len(out.columns)] = r["mean"]

    # Use the total rate in a file rather than mean sample rate
    for i in range(len(contexts)):
        for name, _, _ in RATES:
            out.loc[name, i] = contexts[i].loc[name, "total"]

    if len(contexts) == 2:
        out["change (%)"] = out[1] / out[0] * 100 - 100
    else:
        # Calculate statistics across the files
        out["mean"] = out.agg("mean", axis=1)
        out["std"] = out.agg("std", axis=1)
        out["cv (%)"] = out["std"] / out["mean"] * 100
    return out


 def print_table(out: pd.DataFrame, *, title: str) -> None:
    """Render the table, either rich or with pure pandas"""
    if rich is None:
        print(f"{title}:")
        print(out)
        print()
        return

    table = rich.table.Table(title=title)

    table.add_column(style="bold")
    for column in out.columns:
        table.add_column(str(column), justify="right")

    # Check if we have a mean and std column to determine "normal" ranges
    try:
        mean_col: int | None = typing.cast(int, out.columns.get_loc("mean"))
        std_col: int | None = typing.cast(int, out.columns.get_loc("std"))
    except KeyError:
        mean_col = std_col = None

    for index, rowa in zip(out.index, out.values):
        top = rowa[mean_col] + rowa[std_col] if mean_col else None
        bot = rowa[mean_col] - rowa[std_col] if mean_col else None
        # Convert row array to list for manipulation
        row = list(rowa)
        for i in range(len(row)):
            if (
                bot
                and (row[i] < bot)
                and isinstance(out.columns[i], int)
            ):
                row[i] = f"[green]{row[i]:.5e}[/green]"
            elif (
                bot
                and (row[i] > top)
                and isinstance(out.columns[i], int)
            ):
                row[i] = f"[red]{row[i]:.5e}[/red]"
            elif isinstance(out.columns[i], str) and "%" in out.columns[i]:
                change = "change" in out.columns[i]
                if change and row[i] > 0:
                    row[i] = f"[red]{row[i]:.2f}[/red]"
                elif change and row[i] < 0:
                    row[i] = f"[green]{row[i]:.2f}[/green]"
                else:
                    row[i] = f"{row[i]:.2f}"
            else:
                row[i] = f"{row[i]:.5e}"

        table.add_row(index, *row)

    rich.print(table)
    print()


 def main() -> None:
    results: dict[str, list[pd.DataFrame]] = {}

    for file in sys.argv[1:]:
        df = read_file(file)
        for context in df["context"].unique():
            extracted = extract_context(df, context)
            try:
                results[context].append(extracted)
            except KeyError:
                results[context] = [extracted]

    for context in results:
        print_table(analyze_contexts(results[context]), title=context)


 if __name__ == "__main__":
    main()
diff --git a/perf.h b/perf.h
 /*
 * Performance measurements
 *
 * Copyright (C) 2026 Julian Andres Klode <jak@debian.org>
 *
 * SPDX-License-Identifier: GPL-2.0+
 */

 #pragma once

 #include <config.h>

 #if defined(APT_COMPILING_APT) && defined(__linux__)
 #include <apt-pkg/macros.h>

 #include <array>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <sstream>

 #include <fcntl.h>
 #include <linux/perf_event.h>
 #include <sys/file.h>
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
 #include <unistd.h>

 namespace APT
 {

 /**
 * \brief A scoped object that will log the performance counters.
 *
 * Set the "APT_PERFORMANCE_LOG" environment variable to produce a
 * JSONL file with records for various contexts, such as the solver.
 */
 class PerformanceContext
 {
   struct measurement
   {
      uint32_t type;
      uint64_t config;
      const char *name;
   };

   static constexpr std::array<measurement, 9> measurements{
      measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, "instructions"},
      measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, "cpu_cycles"},
      measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, "ref_cpu_cycles"},
      measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, "cache_references"},
      measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, "cache_misses"},
      measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch_instructions"},
      measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, "branch_misses"},
      measurement{PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, "cpu_clock"},
      measurement{PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, "task_clock"},
   };

   /// Output filename
   std::string out;
   /// Name of the context
   std::string name;
   /// FDs to communicate with the kernel
   std::array<int, measurements.size()> fds;

   // Wrapper for the system call
   static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
 			       int cpu, int group_fd, unsigned long flags)
   {
      return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
   }

   // Wrapper for the system call
   static int open_perf_counter(uint32_t type, uint64_t config)
   {
      struct perf_event_attr pe;
      memset(&pe, 0, sizeof(struct perf_event_attr));
      pe.type = type;
      pe.size = sizeof(struct perf_event_attr);
      pe.config = config;
      pe.disabled = 1;
      pe.exclude_kernel = 1;
      pe.exclude_hv = 1;

      int fd = perf_event_open(&pe, 0, -1, -1, 0);
      return fd;
   }

   public:
   /// Construct a new scoped performance context
   PerformanceContext(std::string name) : name(name)
   {
      if (auto out = getenv("APT_PERFORMANCE_LOG"))
 	 this->out = out;
      if (likely(out.empty()))
 	 return;
      for (size_t i = 0; i < measurements.size(); ++i)
 	 fds[i] = open_perf_counter(measurements[i].type, measurements[i].config);
      for (auto fd : fds)
 	 must_succeed(fd == -1 || ioctl(fd, PERF_EVENT_IOC_RESET, 0) != -1);
      for (auto fd : fds)
 	 must_succeed(fd == -1 || ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) != -1);
   }
   /// Collect the results and store them in the specified performance file
   ~PerformanceContext()
   {
      if (likely(out.empty()))
 	 return;
      for (auto fd : fds)
 	 must_succeed(fd == -1 || ioctl(fd, PERF_EVENT_IOC_DISABLE, 0) != -1);

      std::array<long long, measurements.size()> values;
      for (size_t i = 0; i < measurements.size(); ++i)
 	 must_succeed(fds[i] == -1 || read(fds[i], &values[i], sizeof(values[i])) == sizeof(values[i]));
      for (auto fd : fds)
 	 must_succeed(fd == -1 || close(fd) == 0);

      std::stringstream ss;
      ss.imbue(std::locale::classic());
      ss << "{\"context\": " << '"' << name << '"';
      for (size_t i = 0; i < measurements.size(); ++i)
      {
 	 ss << ", ";
 	 ss << '"' << measurements[i].name << '"' << ": " << values[i];
      }

      ss << "}\n";

      std::string entry = ss.str();

      // Atomically append a line to the JSONL file, allowing all users to read it
      int fd = open(out.c_str(), O_WRONLY | O_CREAT | O_APPEND, 0666);
      must_succeed(fd != -1);
      must_succeed(flock(fd, LOCK_EX) == 0);
      must_succeed(write(fd, entry.c_str(), entry.size()) == static_cast<ssize_t>(entry.size()));
      must_succeed(flock(fd, LOCK_UN) == 0);
      must_succeed(close(fd) == 0);
   }
 };

 } // namespace APT

 #else
 namespace APT
 {
 struct PerformanceContext
 {
   PerformanceContext(const char *) {};
   ~PerformanceContext() {};
 };
 } // namespace APT
 #endif
	# Script to compare apt performance logs
	#
	# Copyright (C) 2026 Julian Andres Klode <jak@debian.org>
	#
	# SPDX-License-Identifier: GPL-2.0+

	import sys
	import typing

	import pandas as pd

	try:
	import rich
	import rich.table
	except ImportError:
	rich = None # type: ignore[assignment]


	# Rate values to calculate. These have some special handling
	RATES = [
	("ipc", "instructions", "cpu_cycles"),
	("cache_miss_rate", "cache_misses", "cache_references"),
	("branch_miss_rate", "branch_misses", "branch_instructions"),
	]


	def read_file(file: str) -> pd.DataFrame:
	"""Read a single APT performance file"""
	with open(file) as fd:
	df = pd.read_json(fd, lines=True)
	for name, a, b in RATES:
	df[name] = df[a] / df[b]
	return df


	def extract_context(df: pd.DataFrame, context: str) -> pd.DataFrame:
	"""Extract a single context from a APT performance file"""
	df = df[df["context"] == context]
	del df["context"]

	out = pd.DataFrame(
	{
	"total": df.sum(),
	"mean": df.mean(),
	"std": df.std(),
	"cv (%)": df.std() / df.mean() * 100,
	}
	)

	# Summing up the rates doesn't make much sense, calculate the total rate instead
	for name, a, b in RATES:
	out.loc[name, "total"] = df[a].sum() / df[b].sum()
	return out


	def analyze_contexts(contexts: list[pd.DataFrame]) -> pd.DataFrame:
	"""Compare a bunch of data frames or just return the data frame if a single one."""
	# Single frame, nothing to compare
	if len(contexts) == 1:
	return contexts[0]

	out = pd.DataFrame()
	for r in contexts:
	out[len(out.columns)] = r["mean"]

	# Use the total rate in a file rather than mean sample rate
	for i in range(len(contexts)):
	for name, _, _ in RATES:
	out.loc[name, i] = contexts[i].loc[name, "total"]

	if len(contexts) == 2:
	out["change (%)"] = out[1] / out[0] * 100 - 100
	else:
	# Calculate statistics across the files
	out["mean"] = out.agg("mean", axis=1)
	out["std"] = out.agg("std", axis=1)
	out["cv (%)"] = out["std"] / out["mean"] * 100
	return out


	def print_table(out: pd.DataFrame, *, title: str) -> None:
	"""Render the table, either rich or with pure pandas"""
	if rich is None:
	print(f"{title}:")
	print(out)
	print()
	return

	table = rich.table.Table(title=title)

	table.add_column(style="bold")
	for column in out.columns:
	table.add_column(str(column), justify="right")

	# Check if we have a mean and std column to determine "normal" ranges
	try:
	mean_col: int \| None = typing.cast(int, out.columns.get_loc("mean"))
	std_col: int \| None = typing.cast(int, out.columns.get_loc("std"))
	except KeyError:
	mean_col = std_col = None

	for index, rowa in zip(out.index, out.values):
	top = rowa[mean_col] + rowa[std_col] if mean_col else None
	bot = rowa[mean_col] - rowa[std_col] if mean_col else None
	# Convert row array to list for manipulation
	row = list(rowa)
	for i in range(len(row)):
	if (
	bot
	and (row[i] < bot)
	and isinstance(out.columns[i], int)
	):
	row[i] = f"[green]{row[i]:.5e}[/green]"
	elif (
	bot
	and (row[i] > top)
	and isinstance(out.columns[i], int)
	):
	row[i] = f"[red]{row[i]:.5e}[/red]"
	elif isinstance(out.columns[i], str) and "%" in out.columns[i]:
	change = "change" in out.columns[i]
	if change and row[i] > 0:
	row[i] = f"[red]{row[i]:.2f}[/red]"
	elif change and row[i] < 0:
	row[i] = f"[green]{row[i]:.2f}[/green]"
	else:
	row[i] = f"{row[i]:.2f}"
	else:
	row[i] = f"{row[i]:.5e}"

	table.add_row(index, *row)

	rich.print(table)
	print()


	def main() -> None:
	results: dict[str, list[pd.DataFrame]] = {}

	for file in sys.argv[1:]:
	df = read_file(file)
	for context in df["context"].unique():
	extracted = extract_context(df, context)
	try:
	results[context].append(extracted)
	except KeyError:
	results[context] = [extracted]

	for context in results:
	print_table(analyze_contexts(results[context]), title=context)


	if __name__ == "__main__":
	main()
	/*
	* Performance measurements
	*
	* Copyright (C) 2026 Julian Andres Klode <jak@debian.org>
	*
	* SPDX-License-Identifier: GPL-2.0+
	*/

	#pragma once

	#include <config.h>

	#if defined(APT_COMPILING_APT) && defined(__linux__)
	#include <apt-pkg/macros.h>

	#include <array>
	#include <cstdint>
	#include <cstdio>
	#include <cstdlib>
	#include <cstring>
	#include <sstream>

	#include <fcntl.h>
	#include <linux/perf_event.h>
	#include <sys/file.h>
	#include <sys/ioctl.h>
	#include <sys/syscall.h>
	#include <unistd.h>

	namespace APT
	{

	/**
	* \brief A scoped object that will log the performance counters.
	*
	* Set the "APT_PERFORMANCE_LOG" environment variable to produce a
	* JSONL file with records for various contexts, such as the solver.
	*/
	class PerformanceContext
	{
	struct measurement
	{
	uint32_t type;
	uint64_t config;
	const char *name;
	};

	static constexpr std::array<measurement, 9> measurements{
	measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, "instructions"},
	measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, "cpu_cycles"},
	measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, "ref_cpu_cycles"},
	measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, "cache_references"},
	measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, "cache_misses"},
	measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch_instructions"},
	measurement{PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, "branch_misses"},
	measurement{PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, "cpu_clock"},
	measurement{PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, "task_clock"},
	};

	/// Output filename
	std::string out;
	/// Name of the context
	std::string name;
	/// FDs to communicate with the kernel
	std::array<int, measurements.size()> fds;

	// Wrapper for the system call
	static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
	int cpu, int group_fd, unsigned long flags)
	{
	return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
	}

	// Wrapper for the system call
	static int open_perf_counter(uint32_t type, uint64_t config)
	{
	struct perf_event_attr pe;
	memset(&pe, 0, sizeof(struct perf_event_attr));
	pe.type = type;
	pe.size = sizeof(struct perf_event_attr);
	pe.config = config;
	pe.disabled = 1;
	pe.exclude_kernel = 1;
	pe.exclude_hv = 1;

	int fd = perf_event_open(&pe, 0, -1, -1, 0);
	return fd;
	}

	public:
	/// Construct a new scoped performance context
	PerformanceContext(std::string name) : name(name)
	{
	if (auto out = getenv("APT_PERFORMANCE_LOG"))
	this->out = out;
	if (likely(out.empty()))
	return;
	for (size_t i = 0; i < measurements.size(); ++i)
	fds[i] = open_perf_counter(measurements[i].type, measurements[i].config);
	for (auto fd : fds)
	must_succeed(fd == -1 \|\| ioctl(fd, PERF_EVENT_IOC_RESET, 0) != -1);
	for (auto fd : fds)
	must_succeed(fd == -1 \|\| ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) != -1);
	}
	/// Collect the results and store them in the specified performance file
	~PerformanceContext()
	{
	if (likely(out.empty()))
	return;
	for (auto fd : fds)
	must_succeed(fd == -1 \|\| ioctl(fd, PERF_EVENT_IOC_DISABLE, 0) != -1);

	std::array<long long, measurements.size()> values;
	for (size_t i = 0; i < measurements.size(); ++i)
	must_succeed(fds[i] == -1 \|\| read(fds[i], &values[i], sizeof(values[i])) == sizeof(values[i]));
	for (auto fd : fds)
	must_succeed(fd == -1 \|\| close(fd) == 0);

	std::stringstream ss;
	ss.imbue(std::locale::classic());
	ss << "{\"context\": " << '"' << name << '"';
	for (size_t i = 0; i < measurements.size(); ++i)
	{
	ss << ", ";
	ss << '"' << measurements[i].name << '"' << ": " << values[i];
	}

	ss << "}\n";

	std::string entry = ss.str();

	// Atomically append a line to the JSONL file, allowing all users to read it
	int fd = open(out.c_str(), O_WRONLY \| O_CREAT \| O_APPEND, 0666);
	must_succeed(fd != -1);
	must_succeed(flock(fd, LOCK_EX) == 0);
	must_succeed(write(fd, entry.c_str(), entry.size()) == static_cast<ssize_t>(entry.size()));
	must_succeed(flock(fd, LOCK_UN) == 0);
	must_succeed(close(fd) == 0);
	}
	};

	} // namespace APT

	#else
	namespace APT
	{
	struct PerformanceContext
	{
	PerformanceContext(const char *) {};
	~PerformanceContext() {};
	};
	} // namespace APT
	#endif