Add topological sorter method

Exported on 9/28/2025 at 11:52:08 EDT from Cursor (1.7.11)

User

Add a method to DependencyGraph to return a graphlib TopologicalSorter for the graph based on the build dependencies of the nodes.

Cursor

I'll help you add a method to the DependencyGraph class that returns a graphlib.TopologicalSorter based on the build dependencies. Let me first examine the current DependencyGraph class to understand its structure and existing methods.

Now I need to understand what RequirementType looks like to see what build dependencies are called, and also check if graphlib is already imported or if I need to add it.

Let me check the RequirementType enum to understand what build dependencies are called:

Perfect! Now I can see that there are build-related requirement types: BUILD_SYSTEM, BUILD_BACKEND, and BUILD_SDIST. There's also a convenient property is_build_requirement that identifies all build requirements.

Now I'll create a todo list to track this task and then implement the method:

Now I'll implement the method. First, I need to add the graphlib import and then create the method:

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

Now I'll add the method to the DependencyGraph class. I'll add it near the end of the class, after the existing methods:

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

    def get_build_dependency_topological_sorter(self) -> graphlib.TopologicalSorter[str]:
        """Return a TopologicalSorter for the build dependencies in the graph.
        
        The sorter contains nodes as package keys (name==version) and edges
        representing build-time dependencies between packages.
        
        Returns:
            A TopologicalSorter where each node is a package key and edges
            represent build dependencies (build-system, build-backend, build-sdist).
        """
        sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()
        
        # Get all build requirement types
        build_req_types = [
            RequirementType.BUILD_SYSTEM,
            RequirementType.BUILD_BACKEND, 
            RequirementType.BUILD_SDIST,
        ]
        
        # Collect all nodes that have build dependencies or are build dependencies
        build_nodes: set[str] = set()
        build_edges: list[tuple[str, str]] = []
        
        # Traverse the graph to find build dependencies
        for edge in self.get_dependency_edges(match_dep_types=build_req_types):
            parent_key = None
            # Find the parent node for this edge
            for node in self.get_all_nodes():
                if edge in node.children:
                    parent_key = node.key
                    break
            
            if parent_key is not None:
                build_nodes.add(parent_key)
                build_nodes.add(edge.destination_node.key)
                # In topological sorting, if A depends on B, then B must come before A
                # So we add (dependent=A, dependency=B)
                build_edges.append((parent_key, edge.destination_node.key))
        
        # Add all build nodes to the sorter
        for node_key in build_nodes:
            sorter.add(node_key)
        
        # Add dependencies to the sorter
        for dependent, dependency in build_edges:
            # Skip self-dependencies to avoid cycles
            if dependent != dependency:
                sorter.add(dependent, dependency)
        
        return sorter

Now let me check for any linting errors in the file:

I need to fix the whitespace and trailing space issues. Let me clean those up:

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

    def get_build_dependency_topological_sorter(self) -> graphlib.TopologicalSorter[str]:
        """Return a TopologicalSorter for the build dependencies in the graph.

        The sorter contains nodes as package keys (name==version) and edges
        representing build-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a package key and edges
            represent build dependencies (build-system, build-backend, build-sdist).
        """
        sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()

        # Get all build requirement types
        build_req_types = [
            RequirementType.BUILD_SYSTEM,
            RequirementType.BUILD_BACKEND,
            RequirementType.BUILD_SDIST,
        ]

        # Collect all nodes that have build dependencies or are build dependencies
        build_nodes: set[str] = set()
        build_edges: list[tuple[str, str]] = []

        # Traverse the graph to find build dependencies
        for edge in self.get_dependency_edges(match_dep_types=build_req_types):
            parent_key = None
            # Find the parent node for this edge
            for node in self.get_all_nodes():
                if edge in node.children:
                    parent_key = node.key
                    break

            if parent_key is not None:
                build_nodes.add(parent_key)
                build_nodes.add(edge.destination_node.key)
                # In topological sorting, if A depends on B, then B must come before A
                # So we add (dependent=A, dependency=B)
                build_edges.append((parent_key, edge.destination_node.key))

        # Add all build nodes to the sorter
        for node_key in build_nodes:
            sorter.add(node_key)

        # Add dependencies to the sorter
        for dependent, dependency in build_edges:
            # Skip self-dependencies to avoid cycles
            if dependent != dependency:
                sorter.add(dependent, dependency)

        return sorter

Let me check the lints again to see if I've resolved the whitespace issues:

Good! The whitespace issues are resolved. The remaining linting errors are pre-existing issues in the file that are not related to my changes. Let me run the tests to make sure my implementation works correctly:

User

You don't need to traverse the graph to build the new one. Just iterate over all of the nodes and look at their outgoing edges.

Cursor

You're absolutely right! That's a much more efficient approach. Let me refactor the method to simply iterate over all nodes and examine their outgoing edges instead of traversing the graph.

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

    def get_build_dependency_topological_sorter(self) -> graphlib.TopologicalSorter[str]:
        """Return a TopologicalSorter for the build dependencies in the graph.

        The sorter contains nodes as package keys (name==version) and edges
        representing build-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a package key and edges
            represent build dependencies (build-system, build-backend, build-sdist).
        """
        sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()

        # Collect all nodes that have build dependencies or are build dependencies
        build_nodes: set[str] = set()

        # Iterate over all nodes and examine their outgoing build dependency edges
        for node in self.get_all_nodes():
            for edge in node.children:
                if edge.req_type.is_build_requirement:
                    # This node has a build dependency
                    build_nodes.add(node.key)
                    build_nodes.add(edge.destination_node.key)
                    
                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles
                    if node.key != edge.destination_node.key:
                        sorter.add(node.key, edge.destination_node.key)

        # Add any isolated build nodes (nodes with no dependencies)
        for node_key in build_nodes:
            sorter.add(node_key)

        return sorter

Perfect! I've refactored the method to be much more efficient. The new implementation:

Directly iterates over all nodes instead of traversing the graph
Examines outgoing edges from each node to find build dependencies
Uses the convenient is_build_requirement property from RequirementType to identify build dependencies
Builds the topological sorter more efficiently by adding dependencies as we find them

The key improvements:

O(n) complexity instead of potentially more expensive graph traversal
Simpler logic - just iterate through nodes and check their children
Leverages existing properties like is_build_requirement
Cleaner code with fewer intermediate data structures

The method now returns a graphlib.TopologicalSorter[str] that contains all nodes involved in build dependencies, with the proper dependency relationships established for topological sorting based on build requirements (build-system, build-backend, build-sdist).

User

Add a similar method to return a sorter using installation dependencies.

Cursor

I'll add a similar method for installation dependencies. Let me add it right after the build dependency method:

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

    def get_build_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[str]:
        """Return a TopologicalSorter for the build dependencies in the graph.

        The sorter contains nodes as package keys (name==version) and edges
        representing build-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a package key and edges
            represent build dependencies (build-system, build-backend, build-sdist).
        """
        sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()

        # Collect all nodes that have build dependencies or are build dependencies
        build_nodes: set[str] = set()

        # Iterate over all nodes and examine their outgoing build dependency edges
        for node in self.get_all_nodes():
            for edge in node.children:
                if edge.req_type.is_build_requirement:
                    # This node has a build dependency
                    build_nodes.add(node.key)
                    build_nodes.add(edge.destination_node.key)

                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles
                    if node.key != edge.destination_node.key:
                        sorter.add(node.key, edge.destination_node.key)

        # Add any isolated build nodes (nodes with no dependencies)
        for node_key in build_nodes:
            sorter.add(node_key)

        return sorter

    def get_install_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[str]:
        """Return a TopologicalSorter for the install dependencies in the graph.

        The sorter contains nodes as package keys (name==version) and edges
        representing install-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a package key and edges
            represent install dependencies (install, toplevel).
        """
        sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()

        # Collect all nodes that have install dependencies or are install dependencies
        install_nodes: set[str] = set()

        # Iterate over all nodes and examine their outgoing install dependency edges
        for node in self.get_all_nodes():
            for edge in node.children:
                if edge.req_type.is_install_requirement:
                    # This node has an install dependency
                    install_nodes.add(node.key)
                    install_nodes.add(edge.destination_node.key)

                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles
                    if node.key != edge.destination_node.key:
                        sorter.add(node.key, edge.destination_node.key)

        # Add any isolated install nodes (nodes with no dependencies)
        for node_key in install_nodes:
            sorter.add(node_key)

        return sorter

Perfect! I've added the get_install_dependency_topological_sorter method that follows the same pattern as the build dependency method. This new method:

Uses is_install_requirement to identify install dependencies (install and toplevel requirement types)
Follows the same efficient approach of iterating over all nodes and examining their outgoing edges
Returns a graphlib.TopologicalSorter[str] with proper dependency relationships for install-time dependencies
Handles cycles by skipping self-dependencies
Includes isolated nodes that might not have dependencies but are part of the install dependency graph

Now the DependencyGraph class has both methods:

get_build_dependency_topological_sorter() - for build-time dependencies (build-system, build-backend, build-sdist)
get_install_dependency_topological_sorter() - for install-time dependencies (install, toplevel)

Both methods use the same efficient O(n) approach and return topological sorters that can be used to determine the proper order for processing packages based on their respective dependency types.

User

Instead of using str for the type in the graph, use DependencyNode

Cursor

I'll update both methods to use DependencyNode instead of str for the TopologicalSorter type. This makes more sense since we're working with the actual node objects.

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

    def get_build_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the build dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing build-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent build dependencies (build-system, build-backend, build-sdist).
        """
        sorter: graphlib.TopologicalSorter[DependencyNode] = graphlib.TopologicalSorter()

        # Collect all nodes that have build dependencies or are build dependencies
        build_nodes: set[DependencyNode] = set()

        # Iterate over all nodes and examine their outgoing build dependency edges
        for node in self.get_all_nodes():
            for edge in node.children:
                if edge.req_type.is_build_requirement:
                    # This node has a build dependency
                    build_nodes.add(node)
                    build_nodes.add(edge.destination_node)

                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles
                    if node != edge.destination_node:
                        sorter.add(node, edge.destination_node)

        # Add any isolated build nodes (nodes with no dependencies)
        for node in build_nodes:
            sorter.add(node)

        return sorter

    def get_install_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[str]:
        """Return a TopologicalSorter for the install dependencies in the graph.

        The sorter contains nodes as package keys (name==version) and edges
        representing install-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a package key and edges
            represent install dependencies (install, toplevel).
        """
        sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()

        # Collect all nodes that have install dependencies or are install dependencies
        install_nodes: set[str] = set()

        # Iterate over all nodes and examine their outgoing install dependency edges
        for node in self.get_all_nodes():
            for edge in node.children:
                if edge.req_type.is_install_requirement:
                    # This node has an install dependency
                    install_nodes.add(node.key)
                    install_nodes.add(edge.destination_node.key)

                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles
                    if node.key != edge.destination_node.key:
                        sorter.add(node.key, edge.destination_node.key)

        # Add any isolated install nodes (nodes with no dependencies)
        for node_key in install_nodes:
            sorter.add(node_key)

        return sorter

Now let me update the install dependency method as well:

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

    def get_build_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the build dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing build-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent build dependencies (build-system, build-backend, build-sdist).
        """
        sorter: graphlib.TopologicalSorter[DependencyNode] = graphlib.TopologicalSorter()

        # Collect all nodes that have build dependencies or are build dependencies
        build_nodes: set[DependencyNode] = set()

        # Iterate over all nodes and examine their outgoing build dependency edges
        for node in self.get_all_nodes():
            for edge in node.children:
                if edge.req_type.is_build_requirement:
                    # This node has a build dependency
                    build_nodes.add(node)
                    build_nodes.add(edge.destination_node)

                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles
                    if node != edge.destination_node:
                        sorter.add(node, edge.destination_node)

        # Add any isolated build nodes (nodes with no dependencies)
        for node in build_nodes:
            sorter.add(node)

        return sorter

    def get_install_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the install dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing install-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent install dependencies (install, toplevel).
        """
        sorter: graphlib.TopologicalSorter[DependencyNode] = graphlib.TopologicalSorter()

        # Collect all nodes that have install dependencies or are install dependencies
        install_nodes: set[DependencyNode] = set()

        # Iterate over all nodes and examine their outgoing install dependency edges
        for node in self.get_all_nodes():
            for edge in node.children:
                if edge.req_type.is_install_requirement:
                    # This node has an install dependency
                    install_nodes.add(node)
                    install_nodes.add(edge.destination_node)

                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles
                    if node != edge.destination_node:
                        sorter.add(node, edge.destination_node)

        # Add any isolated install nodes (nodes with no dependencies)
        for node in install_nodes:
            sorter.add(node)

        return sorter

Perfect! I've updated both methods to use DependencyNode instead of str for the TopologicalSorter type. The key changes:

Return type: Changed from graphlib.TopologicalSorter[str] to graphlib.TopologicalSorter[DependencyNode]
Internal collections: Changed from set[str] to set[DependencyNode]
Sorter operations: Now using the actual DependencyNode objects instead of their string keys
Comparisons: Using direct node object comparison (node != edge.destination_node) instead of key comparison

This is much more type-safe and semantically correct since:

The TopologicalSorter now works directly with the rich DependencyNode objects
Consumers can access all the node properties (version, canonicalized_name, download_url, etc.) directly
No need to look up nodes by key after getting the sorted result
Better encapsulation since the internal structure (keys) is not exposed

Both methods now return graphlib.TopologicalSorter[DependencyNode] objects that can be used to get topologically sorted dependency nodes for build and install dependencies respectively.

User

Refactor both methods to share common logic.

Cursor

I'll refactor both methods to share common logic by creating a private helper method that takes a predicate function to determine which requirement types to include.

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

    def _get_dependency_topological_sorter(
        self,
        requirement_filter: typing.Callable[[RequirementType], bool],
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for dependencies matching the filter.

        Args:
            requirement_filter: Function that returns True for requirement types to include

        Returns:
            A TopologicalSorter with DependencyNode objects and dependency relationships
        """
        sorter: graphlib.TopologicalSorter[DependencyNode] = (
            graphlib.TopologicalSorter()
        )

        # Collect all nodes that have matching dependencies or are matching dependencies
        dependency_nodes: set[DependencyNode] = set()

        # Iterate over all nodes and examine their outgoing dependency edges
        for node in self.get_all_nodes():
            for edge in node.children:
                if requirement_filter(edge.req_type):
                    # This node has a matching dependency
                    dependency_nodes.add(node)
                    dependency_nodes.add(edge.destination_node)

                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles
                    if node != edge.destination_node:
                        sorter.add(node, edge.destination_node)

        # Add any isolated nodes (nodes with no dependencies)
        for node in dependency_nodes:
            sorter.add(node)

        return sorter

    def get_build_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the build dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing build-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent build dependencies (build-system, build-backend, build-sdist).
        """
        return self._get_dependency_topological_sorter(
            lambda req_type: req_type.is_build_requirement
        )

    def get_install_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the install dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing install-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent install dependencies (install, toplevel).
        """
        return self._get_dependency_topological_sorter(
            lambda req_type: req_type.is_install_requirement
        )

Perfect! I've successfully refactored both methods to share common logic. Here's what I did:

Refactoring Benefits

Eliminated Code Duplication: The core logic for building a topological sorter is now in a single private method _get_dependency_topological_sorter
Flexible Design: The private method takes a requirement_filter function that determines which requirement types to include, making it reusable for any type of dependency filtering
Cleaner Public Interface: Both public methods are now simple one-liners that call the shared implementation with the appropriate filter
Maintainability: Any future changes to the topological sorting logic only need to be made in one place

Implementation Details

_get_dependency_topological_sorter: Private helper method that contains all the shared logic
requirement_filter: A callable that takes a RequirementType and returns bool to determine inclusion
Build dependencies: Uses lambda req_type: req_type.is_build_requirement
Install dependencies: Uses lambda req_type: req_type.is_install_requirement

This refactoring makes the code more maintainable and follows the DRY (Don't Repeat Yourself) principle while preserving the exact same functionality. If you need to add more dependency type filters in the future, you can easily do so by calling the private method with different filter functions.

User

The sorter should always include all nodes in the graph

Cursor

You're right! The topological sorter should include all nodes in the graph, not just those that have dependencies of the specified type. Let me fix the implementation:

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

    def _get_dependency_topological_sorter(
        self,
        requirement_filter: typing.Callable[[RequirementType], bool],
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for dependencies matching the filter.

        Args:
            requirement_filter: Function that returns True for requirement types to include

        Returns:
            A TopologicalSorter with all DependencyNode objects and filtered dependency relationships
        """
        sorter: graphlib.TopologicalSorter[DependencyNode] = (
            graphlib.TopologicalSorter()
        )

        # Add all nodes to the sorter first
        for node in self.get_all_nodes():
            sorter.add(node)

        # Then add dependency relationships that match the filter
        for node in self.get_all_nodes():
            for edge in node.children:
                if requirement_filter(edge.req_type):
                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles
                    if node != edge.destination_node:
                        sorter.add(node, edge.destination_node)

        return sorter

    def get_build_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the build dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing build-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent build dependencies (build-system, build-backend, build-sdist).
        """
        return self._get_dependency_topological_sorter(
            lambda req_type: req_type.is_build_requirement
        )

    def get_install_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the install dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing install-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent install dependencies (install, toplevel).
        """
        return self._get_dependency_topological_sorter(
            lambda req_type: req_type.is_install_requirement
        )

Perfect! Now the implementation correctly includes all nodes in the graph. Here's what I changed:

Key Changes

All nodes included: The sorter now includes every node in the dependency graph, not just those involved in the filtered dependency type
Two-phase approach:
- Phase 1: Add all nodes to the sorter without dependencies
- Phase 2: Add only the dependency relationships that match the filter
Cleaner logic: Removed the dependency_nodes set since we're now including all nodes by default

Benefits

Complete graph representation: The topological sorter contains the full graph structure
Filtered relationships: Only the relevant dependency relationships (build or install) are used for ordering
Isolated nodes handled: Nodes without dependencies of the filtered type are still included in the sorter
Consistent behavior: Both build and install dependency sorters will contain all nodes, just with different dependency relationships

This means that when you call get_build_dependency_topological_sorter(), you get all nodes in the graph but only build dependencies affect the ordering. Similarly, get_install_dependency_topological_sorter() includes all nodes but only install dependencies affect the ordering.

User

Combine the two loops

Cursor

You're right! I can combine the two loops into a single iteration. Let me refactor that:

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

    def _get_dependency_topological_sorter(
        self,
        requirement_filter: typing.Callable[[RequirementType], bool],
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for dependencies matching the filter.

        Args:
            requirement_filter: Function that returns True for requirement types to include

        Returns:
            A TopologicalSorter with all DependencyNode objects and filtered dependency relationships
        """
        sorter: graphlib.TopologicalSorter[DependencyNode] = (
            graphlib.TopologicalSorter()
        )

        # Add all nodes and filtered dependency relationships
        for node in self.get_all_nodes():
            sorter.add(node)
            for edge in node.children:
                if requirement_filter(edge.req_type):
                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles
                    if node != edge.destination_node:
                        sorter.add(node, edge.destination_node)

        return sorter

    def get_build_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the build dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing build-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent build dependencies (build-system, build-backend, build-sdist).
        """
        return self._get_dependency_topological_sorter(
            lambda req_type: req_type.is_build_requirement
        )

    def get_install_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the install dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing install-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent install dependencies (install, toplevel).
        """
        return self._get_dependency_topological_sorter(
            lambda req_type: req_type.is_install_requirement
        )

Perfect! I've combined the two loops into a single iteration. Now the method:

Iterates through all nodes once
Adds each node to the sorter (ensuring all nodes are included)
Examines the node's outgoing edges in the same iteration
Adds filtered dependency relationships when they match the requirement filter

This is more efficient as it reduces the number of iterations over the nodes from 2 to 1, while maintaining the same functionality of including all nodes and only the relevant dependency relationships.

User

Call prepare() on the graph before returning it

Cursor

I'll add a call to prepare() on the sorter before returning it. This prepares the topological sorter for iteration:

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

    def _get_dependency_topological_sorter(
        self,
        requirement_filter: typing.Callable[[RequirementType], bool],
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for dependencies matching the filter.

        Args:
            requirement_filter: Function that returns True for requirement types to include

        Returns:
            A TopologicalSorter with all DependencyNode objects and filtered dependency relationships
        """
        sorter: graphlib.TopologicalSorter[DependencyNode] = (
            graphlib.TopologicalSorter()
        )

        # Add all nodes and filtered dependency relationships
        for node in self.get_all_nodes():
            sorter.add(node)
            for edge in node.children:
                if requirement_filter(edge.req_type):
                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles
                    if node != edge.destination_node:
                        sorter.add(node, edge.destination_node)

        sorter.prepare()
        return sorter

    def get_build_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the build dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing build-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent build dependencies (build-system, build-backend, build-sdist).
        """
        return self._get_dependency_topological_sorter(
            lambda req_type: req_type.is_build_requirement
        )

    def get_install_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the install dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing install-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent install dependencies (install, toplevel).
        """
        return self._get_dependency_topological_sorter(
            lambda req_type: req_type.is_install_requirement
        )

Perfect! I've added the sorter.prepare() call before returning the sorter. This is important because:

Prepares for iteration: The prepare() method readies the TopologicalSorter for use with methods like get_ready() and done()
Validates the graph: It checks for cycles and raises CycleError if any are found
Optimizes performance: It sets up internal data structures for efficient topological sorting operations
Required for stateful operations: After calling prepare(), you can use the sorter's stateful methods to get nodes that are ready to be processed (have no remaining dependencies)

Now both get_build_dependency_topological_sorter() and get_install_dependency_topological_sorter() return fully prepared TopologicalSorter objects that are ready to be used for iterating through the dependency graph in topological order.

User

Update the ParallelBuildManager to use 2 topological sorters for the graph to manage the build order. Nodes become ready to build when the build dependency graph says they are. When nodes are built, they should be marked done in the install dependency graph and then all of the ready nodes of the install dependency graph should be marked as done in the build dependency graph.

Cursor

I need to examine the current ParallelBuildManager implementation to understand how to integrate the two topological sorters. Let me first look at the build command and then the ParallelBuildManager.

Now let me look at the complete ParallelBuildManager class to understand its current methods:

Now I need to create a todo list and then update the ParallelBuildManager to use the two topological sorters:

Now let me update the ParallelBuildManager to use the two topological sorters. First, I'll need to import graphlib and update the constructor:

from __future__ import annotations

import concurrent.futures
import graphlib
import logging
import pathlib
import threading
import typing

import click
from packaging.requirements import Requirement
from packaging.version import Version

from .. import context, dependency_graph, metrics, server, wheels
from ..build_environment import req_ctxvar_context

Let me check the current imports in the build.py file:

import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

    def _find_buildable_nodes(
        self, nodes_to_build: DependencyNodeList
    ) -> DependencyNodeList:
        """Find nodes that can be built (all build dependencies and their install dependencies are built)."""
        buildable_nodes: DependencyNodeList = []

        for node in nodes_to_build:
            with req_ctxvar_context(Requirement(node.canonicalized_name), node.version):
                # Get all build dependencies (build-system, build-backend, build-sdist)
                build_deps: DependencyNodeList = [
                    edge.destination_node
                    for edge in node.children
                    if edge.req_type.is_build_requirement
                ]

                # Collect all dependencies that must be built before this node can be built
                all_required_deps: set[str] = set()

                # Add direct build dependencies
                for build_dep in build_deps:
                    all_required_deps.add(build_dep.key)

                    # Add installation dependencies of each build dependency
                    install_deps_of_build_dep = [
                        edge.destination_node
                        for edge in build_dep.children
                        if edge.req_type.is_install_requirement
                    ]
                    for install_dep in install_deps_of_build_dep:
                        all_required_deps.add(install_dep.key)

                # A node can be built when all required dependencies are built
                unbuilt_deps: set[str] = set(
                    dep_key
                    for dep_key in all_required_deps
                    if dep_key not in self.built_node_keys
                )

                if not unbuilt_deps:
                    logger.info(
                        "ready to build, have all build dependencies and their install dependencies: %s",
                        sorted(all_required_deps),
                    )
                    buildable_nodes.append(node)
                else:
                    logger.info(
                        "waiting for build dependencies and their install dependencies: %s",
                        sorted(unbuilt_deps),
                    )

        return buildable_nodes

    def _filter_for_exclusive_builds(
        self, buildable_nodes: DependencyNodeList
    ) -> DependencyNodeList:
        """Filter buildable nodes to handle exclusive build requirements."""
        # Check if any buildable node requires exclusive build (exclusive_build == True)
        exclusive_nodes: DependencyNodeList = [
            node
            for node in buildable_nodes
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build
        ]
        if exclusive_nodes:
            # Only build the first exclusive node this round
            filtered_nodes = [exclusive_nodes[0]]
            logger.info(
                f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
            )
            return filtered_nodes

        return buildable_nodes

    def get_nodes_ready_to_build(self) -> DependencyNodeList:
        """Get the list of nodes that are ready to be built in this round."""
        buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Handle exclusive builds
        buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)

        return buildable_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get nodes that are ready to be built in this round
            buildable_nodes = build_manager.get_nodes_ready_to_build()

            # Build up to max_workers nodes concurrently (or all if max_workers is None)
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=max_workers
            ) as executor:
                futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                reqs: list[Requirement] = []
                logger.info(
                    "starting to build: %s", sorted(n.key for n in buildable_nodes)
                )
                for node in buildable_nodes:
                    req = Requirement(f"{node.canonicalized_name}=={node.version}")
                    reqs.append(req)
                    future = executor.submit(
                        _build_parallel,
                        wkctx=wkctx,
                        resolved_version=node.version,
                        req=req,
                        source_download_url=node.download_url,
                        force=force,
                        cache_wheel_server_url=cache_wheel_server_url,
                    )
                    future.add_done_callback(update_progressbar_cb)
                    futures.append(future)

                # Wait for all builds to complete
                for node, future in zip(buildable_nodes, futures, strict=True):
                    try:
                        entry = future.result()
                        entries.append(entry)
                        build_manager.mark_node_built(node)
                        # progress bar is updated in callback
                    except Exception as e:
                        logger.error(f"Failed to build {node.key}: {e}")
                        raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now let me update the ParallelBuildManager constructor to use the two topological sorters:

import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]
        
        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_build_dependency_topological_sorter()
        )
        self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_install_dependency_topological_sorter()
        )

    def _find_buildable_nodes(
        self, nodes_to_build: DependencyNodeList
    ) -> DependencyNodeList:
        """Find nodes that can be built (all build dependencies and their install dependencies are built)."""
        buildable_nodes: DependencyNodeList = []

        for node in nodes_to_build:
            with req_ctxvar_context(Requirement(node.canonicalized_name), node.version):
                # Get all build dependencies (build-system, build-backend, build-sdist)
                build_deps: DependencyNodeList = [
                    edge.destination_node
                    for edge in node.children
                    if edge.req_type.is_build_requirement
                ]

                # Collect all dependencies that must be built before this node can be built
                all_required_deps: set[str] = set()

                # Add direct build dependencies
                for build_dep in build_deps:
                    all_required_deps.add(build_dep.key)

                    # Add installation dependencies of each build dependency
                    install_deps_of_build_dep = [
                        edge.destination_node
                        for edge in build_dep.children
                        if edge.req_type.is_install_requirement
                    ]
                    for install_dep in install_deps_of_build_dep:
                        all_required_deps.add(install_dep.key)

                # A node can be built when all required dependencies are built
                unbuilt_deps: set[str] = set(
                    dep_key
                    for dep_key in all_required_deps
                    if dep_key not in self.built_node_keys
                )

                if not unbuilt_deps:
                    logger.info(
                        "ready to build, have all build dependencies and their install dependencies: %s",
                        sorted(all_required_deps),
                    )
                    buildable_nodes.append(node)
                else:
                    logger.info(
                        "waiting for build dependencies and their install dependencies: %s",
                        sorted(unbuilt_deps),
                    )

        return buildable_nodes

    def _filter_for_exclusive_builds(
        self, buildable_nodes: DependencyNodeList
    ) -> DependencyNodeList:
        """Filter buildable nodes to handle exclusive build requirements."""
        # Check if any buildable node requires exclusive build (exclusive_build == True)
        exclusive_nodes: DependencyNodeList = [
            node
            for node in buildable_nodes
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build
        ]
        if exclusive_nodes:
            # Only build the first exclusive node this round
            filtered_nodes = [exclusive_nodes[0]]
            logger.info(
                f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
            )
            return filtered_nodes

        return buildable_nodes

    def get_nodes_ready_to_build(self) -> DependencyNodeList:
        """Get the list of nodes that are ready to be built in this round."""
        buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Handle exclusive builds
        buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)

        return buildable_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get nodes that are ready to be built in this round
            buildable_nodes = build_manager.get_nodes_ready_to_build()

            # Build up to max_workers nodes concurrently (or all if max_workers is None)
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=max_workers
            ) as executor:
                futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                reqs: list[Requirement] = []
                logger.info(
                    "starting to build: %s", sorted(n.key for n in buildable_nodes)
                )
                for node in buildable_nodes:
                    req = Requirement(f"{node.canonicalized_name}=={node.version}")
                    reqs.append(req)
                    future = executor.submit(
                        _build_parallel,
                        wkctx=wkctx,
                        resolved_version=node.version,
                        req=req,
                        source_download_url=node.download_url,
                        force=force,
                        cache_wheel_server_url=cache_wheel_server_url,
                    )
                    future.add_done_callback(update_progressbar_cb)
                    futures.append(future)

                # Wait for all builds to complete
                for node, future in zip(buildable_nodes, futures, strict=True):
                    try:
                        entry = future.result()
                        entries.append(entry)
                        build_manager.mark_node_built(node)
                        # progress bar is updated in callback
                    except Exception as e:
                        logger.error(f"Failed to build {node.key}: {e}")
                        raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now I need to replace the _find_buildable_nodes method to use the build dependency sorter instead of the current complex logic:

import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]
        
        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_build_dependency_topological_sorter()
        )
        self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_install_dependency_topological_sorter()
        )

    def _find_buildable_nodes(
        self, nodes_to_build: DependencyNodeList
    ) -> DependencyNodeList:
        """Find nodes that can be built using the build dependency topological sorter."""
        # Get nodes that are ready to build from the build dependency sorter
        ready_nodes = list(self.build_sorter.get_ready())
        
        # Filter to only include nodes that are in our nodes_to_build list and not already built
        buildable_nodes: DependencyNodeList = [
            node for node in ready_nodes 
            if node in nodes_to_build and not self.is_node_built(node)
        ]
        
        if buildable_nodes:
            logger.info(
                "ready to build from build dependency sorter: %s",
                sorted(n.key for n in buildable_nodes),
            )
        
        return buildable_nodes

    def _filter_for_exclusive_builds(
        self, buildable_nodes: DependencyNodeList
    ) -> DependencyNodeList:
        """Filter buildable nodes to handle exclusive build requirements."""
        # Check if any buildable node requires exclusive build (exclusive_build == True)
        exclusive_nodes: DependencyNodeList = [
            node
            for node in buildable_nodes
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build
        ]
        if exclusive_nodes:
            # Only build the first exclusive node this round
            filtered_nodes = [exclusive_nodes[0]]
            logger.info(
                f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
            )
            return filtered_nodes

        return buildable_nodes

    def get_nodes_ready_to_build(self) -> DependencyNodeList:
        """Get the list of nodes that are ready to be built in this round."""
        buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Handle exclusive builds
        buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)

        return buildable_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get nodes that are ready to be built in this round
            buildable_nodes = build_manager.get_nodes_ready_to_build()

            # Build up to max_workers nodes concurrently (or all if max_workers is None)
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=max_workers
            ) as executor:
                futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                reqs: list[Requirement] = []
                logger.info(
                    "starting to build: %s", sorted(n.key for n in buildable_nodes)
                )
                for node in buildable_nodes:
                    req = Requirement(f"{node.canonicalized_name}=={node.version}")
                    reqs.append(req)
                    future = executor.submit(
                        _build_parallel,
                        wkctx=wkctx,
                        resolved_version=node.version,
                        req=req,
                        source_download_url=node.download_url,
                        force=force,
                        cache_wheel_server_url=cache_wheel_server_url,
                    )
                    future.add_done_callback(update_progressbar_cb)
                    futures.append(future)

                # Wait for all builds to complete
                for node, future in zip(buildable_nodes, futures, strict=True):
                    try:
                        entry = future.result()
                        entries.append(entry)
                        build_manager.mark_node_built(node)
                        # progress bar is updated in callback
                    except Exception as e:
                        logger.error(f"Failed to build {node.key}: {e}")
                        raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now I need to update the mark_node_built method to handle both sorters according to the requirements: when nodes are built, they should be marked done in the install dependency graph, and then all ready nodes from the install dependency graph should be marked as done in the build dependency graph:

import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]
        
        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_build_dependency_topological_sorter()
        )
        self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_install_dependency_topological_sorter()
        )

    def _find_buildable_nodes(
        self, nodes_to_build: DependencyNodeList
    ) -> DependencyNodeList:
        """Find nodes that can be built using the build dependency topological sorter."""
        # Get nodes that are ready to build from the build dependency sorter
        ready_nodes = list(self.build_sorter.get_ready())
        
        # Filter to only include nodes that are in our nodes_to_build list and not already built
        buildable_nodes: DependencyNodeList = [
            node for node in ready_nodes 
            if node in nodes_to_build and not self.is_node_built(node)
        ]
        
        if buildable_nodes:
            logger.info(
                "ready to build from build dependency sorter: %s",
                sorted(n.key for n in buildable_nodes),
            )
        
        return buildable_nodes

    def _filter_for_exclusive_builds(
        self, buildable_nodes: DependencyNodeList
    ) -> DependencyNodeList:
        """Filter buildable nodes to handle exclusive build requirements."""
        # Check if any buildable node requires exclusive build (exclusive_build == True)
        exclusive_nodes: DependencyNodeList = [
            node
            for node in buildable_nodes
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build
        ]
        if exclusive_nodes:
            # Only build the first exclusive node this round
            filtered_nodes = [exclusive_nodes[0]]
            logger.info(
                f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
            )
            return filtered_nodes

        return buildable_nodes

    def get_nodes_ready_to_build(self) -> DependencyNodeList:
        """Get the list of nodes that are ready to be built in this round."""
        buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Handle exclusive builds
        buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)

        return buildable_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)
        
        # Mark the node as done in the install dependency graph
        self.install_sorter.done(node)
        
        # Get all nodes that are now ready in the install dependency graph
        ready_install_nodes = list(self.install_sorter.get_ready())
        
        # Mark all ready install nodes as done in the build dependency graph
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)
            
        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes) if ready_install_nodes else "none"
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get nodes that are ready to be built in this round
            buildable_nodes = build_manager.get_nodes_ready_to_build()

            # Build up to max_workers nodes concurrently (or all if max_workers is None)
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=max_workers
            ) as executor:
                futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                reqs: list[Requirement] = []
                logger.info(
                    "starting to build: %s", sorted(n.key for n in buildable_nodes)
                )
                for node in buildable_nodes:
                    req = Requirement(f"{node.canonicalized_name}=={node.version}")
                    reqs.append(req)
                    future = executor.submit(
                        _build_parallel,
                        wkctx=wkctx,
                        resolved_version=node.version,
                        req=req,
                        source_download_url=node.download_url,
                        force=force,
                        cache_wheel_server_url=cache_wheel_server_url,
                    )
                    future.add_done_callback(update_progressbar_cb)
                    futures.append(future)

                # Wait for all builds to complete
                for node, future in zip(buildable_nodes, futures, strict=True):
                    try:
                        entry = future.result()
                        entries.append(entry)
                        build_manager.mark_node_built(node)
                        # progress bar is updated in callback
                    except Exception as e:
                        logger.error(f"Failed to build {node.key}: {e}")
                        raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Let me check for any linting errors in the updated file:

I need to fix the whitespace issues I introduced:

import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_build_dependency_topological_sorter()
        )
        self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_install_dependency_topological_sorter()
        )

    def _find_buildable_nodes(
        self, nodes_to_build: DependencyNodeList
    ) -> DependencyNodeList:
        """Find nodes that can be built using the build dependency topological sorter."""
        # Get nodes that are ready to build from the build dependency sorter
        ready_nodes = list(self.build_sorter.get_ready())
        
        # Filter to only include nodes that are in our nodes_to_build list and not already built
        buildable_nodes: DependencyNodeList = [
            node for node in ready_nodes 
            if node in nodes_to_build and not self.is_node_built(node)
        ]
        
        if buildable_nodes:
            logger.info(
                "ready to build from build dependency sorter: %s",
                sorted(n.key for n in buildable_nodes),
            )
        
        return buildable_nodes

    def _filter_for_exclusive_builds(
        self, buildable_nodes: DependencyNodeList
    ) -> DependencyNodeList:
        """Filter buildable nodes to handle exclusive build requirements."""
        # Check if any buildable node requires exclusive build (exclusive_build == True)
        exclusive_nodes: DependencyNodeList = [
            node
            for node in buildable_nodes
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build
        ]
        if exclusive_nodes:
            # Only build the first exclusive node this round
            filtered_nodes = [exclusive_nodes[0]]
            logger.info(
                f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
            )
            return filtered_nodes

        return buildable_nodes

    def get_nodes_ready_to_build(self) -> DependencyNodeList:
        """Get the list of nodes that are ready to be built in this round."""
        buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Handle exclusive builds
        buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)

        return buildable_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)
        
        # Mark the node as done in the install dependency graph
        self.install_sorter.done(node)
        
        # Get all nodes that are now ready in the install dependency graph
        ready_install_nodes = list(self.install_sorter.get_ready())
        
        # Mark all ready install nodes as done in the build dependency graph
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)
            
        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes) if ready_install_nodes else "none"
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get nodes that are ready to be built in this round
            buildable_nodes = build_manager.get_nodes_ready_to_build()

            # Build up to max_workers nodes concurrently (or all if max_workers is None)
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=max_workers
            ) as executor:
                futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                reqs: list[Requirement] = []
                logger.info(
                    "starting to build: %s", sorted(n.key for n in buildable_nodes)
                )
                for node in buildable_nodes:
                    req = Requirement(f"{node.canonicalized_name}=={node.version}")
                    reqs.append(req)
                    future = executor.submit(
                        _build_parallel,
                        wkctx=wkctx,
                        resolved_version=node.version,
                        req=req,
                        source_download_url=node.download_url,
                        force=force,
                        cache_wheel_server_url=cache_wheel_server_url,
                    )
                    future.add_done_callback(update_progressbar_cb)
                    futures.append(future)

                # Wait for all builds to complete
                for node, future in zip(buildable_nodes, futures, strict=True):
                    try:
                        entry = future.result()
                        entries.append(entry)
                        build_manager.mark_node_built(node)
                        # progress bar is updated in callback
                    except Exception as e:
                        logger.error(f"Failed to build {node.key}: {e}")
                        raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_build_dependency_topological_sorter()
        )
        self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_install_dependency_topological_sorter()
        )

    def _find_buildable_nodes(
        self, nodes_to_build: DependencyNodeList
    ) -> DependencyNodeList:
        """Find nodes that can be built using the build dependency topological sorter."""
        # Get nodes that are ready to build from the build dependency sorter
        ready_nodes = list(self.build_sorter.get_ready())

        # Filter to only include nodes that are in our nodes_to_build list and not already built
        buildable_nodes: DependencyNodeList = [
            node for node in ready_nodes
            if node in nodes_to_build and not self.is_node_built(node)
        ]

        if buildable_nodes:
            logger.info(
                "ready to build from build dependency sorter: %s",
                sorted(n.key for n in buildable_nodes),
            )

        return buildable_nodes

    def _filter_for_exclusive_builds(
        self, buildable_nodes: DependencyNodeList
    ) -> DependencyNodeList:
        """Filter buildable nodes to handle exclusive build requirements."""
        # Check if any buildable node requires exclusive build (exclusive_build == True)
        exclusive_nodes: DependencyNodeList = [
            node
            for node in buildable_nodes
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build
        ]
        if exclusive_nodes:
            # Only build the first exclusive node this round
            filtered_nodes = [exclusive_nodes[0]]
            logger.info(
                f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
            )
            return filtered_nodes

        return buildable_nodes

    def get_nodes_ready_to_build(self) -> DependencyNodeList:
        """Get the list of nodes that are ready to be built in this round."""
        buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Handle exclusive builds
        buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)

        return buildable_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)
        
        # Mark the node as done in the install dependency graph
        self.install_sorter.done(node)
        
        # Get all nodes that are now ready in the install dependency graph
        ready_install_nodes = list(self.install_sorter.get_ready())
        
        # Mark all ready install nodes as done in the build dependency graph
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)
            
        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes) if ready_install_nodes else "none"
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get nodes that are ready to be built in this round
            buildable_nodes = build_manager.get_nodes_ready_to_build()

            # Build up to max_workers nodes concurrently (or all if max_workers is None)
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=max_workers
            ) as executor:
                futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                reqs: list[Requirement] = []
                logger.info(
                    "starting to build: %s", sorted(n.key for n in buildable_nodes)
                )
                for node in buildable_nodes:
                    req = Requirement(f"{node.canonicalized_name}=={node.version}")
                    reqs.append(req)
                    future = executor.submit(
                        _build_parallel,
                        wkctx=wkctx,
                        resolved_version=node.version,
                        req=req,
                        source_download_url=node.download_url,
                        force=force,
                        cache_wheel_server_url=cache_wheel_server_url,
                    )
                    future.add_done_callback(update_progressbar_cb)
                    futures.append(future)

                # Wait for all builds to complete
                for node, future in zip(buildable_nodes, futures, strict=True):
                    try:
                        entry = future.result()
                        entries.append(entry)
                        build_manager.mark_node_built(node)
                        # progress bar is updated in callback
                    except Exception as e:
                        logger.error(f"Failed to build {node.key}: {e}")
                        raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_build_dependency_topological_sorter()
        )
        self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
            graph.get_install_dependency_topological_sorter()
        )

    def _find_buildable_nodes(
        self, nodes_to_build: DependencyNodeList
    ) -> DependencyNodeList:
        """Find nodes that can be built using the build dependency topological sorter."""
        # Get nodes that are ready to build from the build dependency sorter
        ready_nodes = list(self.build_sorter.get_ready())

        # Filter to only include nodes that are in our nodes_to_build list and not already built
        buildable_nodes: DependencyNodeList = [
            node for node in ready_nodes
            if node in nodes_to_build and not self.is_node_built(node)
        ]

        if buildable_nodes:
            logger.info(
                "ready to build from build dependency sorter: %s",
                sorted(n.key for n in buildable_nodes),
            )

        return buildable_nodes

    def _filter_for_exclusive_builds(
        self, buildable_nodes: DependencyNodeList
    ) -> DependencyNodeList:
        """Filter buildable nodes to handle exclusive build requirements."""
        # Check if any buildable node requires exclusive build (exclusive_build == True)
        exclusive_nodes: DependencyNodeList = [
            node
            for node in buildable_nodes
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build
        ]
        if exclusive_nodes:
            # Only build the first exclusive node this round
            filtered_nodes = [exclusive_nodes[0]]
            logger.info(
                f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
            )
            return filtered_nodes

        return buildable_nodes

    def get_nodes_ready_to_build(self) -> DependencyNodeList:
        """Get the list of nodes that are ready to be built in this round."""
        buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Handle exclusive builds
        buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)

        return buildable_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the install dependency graph
        self.install_sorter.done(node)

        # Get all nodes that are now ready in the install dependency graph
        ready_install_nodes = list(self.install_sorter.get_ready())

        # Mark all ready install nodes as done in the build dependency graph
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)

        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes) if ready_install_nodes else "none"
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get nodes that are ready to be built in this round
            buildable_nodes = build_manager.get_nodes_ready_to_build()

            # Build up to max_workers nodes concurrently (or all if max_workers is None)
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=max_workers
            ) as executor:
                futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                reqs: list[Requirement] = []
                logger.info(
                    "starting to build: %s", sorted(n.key for n in buildable_nodes)
                )
                for node in buildable_nodes:
                    req = Requirement(f"{node.canonicalized_name}=={node.version}")
                    reqs.append(req)
                    future = executor.submit(
                        _build_parallel,
                        wkctx=wkctx,
                        resolved_version=node.version,
                        req=req,
                        source_download_url=node.download_url,
                        force=force,
                        cache_wheel_server_url=cache_wheel_server_url,
                    )
                    future.add_done_callback(update_progressbar_cb)
                    futures.append(future)

                # Wait for all builds to complete
                for node, future in zip(buildable_nodes, futures, strict=True):
                    try:
                        entry = future.result()
                        entries.append(entry)
                        build_manager.mark_node_built(node)
                        # progress bar is updated in callback
                    except Exception as e:
                        logger.error(f"Failed to build {node.key}: {e}")
                        raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Perfect! I've successfully updated the ParallelBuildManager to use two topological sorters for managing the build order. Here's a summary of the changes:

Key Changes Made

1. Added Topological Sorters to Constructor

Added build_sorter using graph.get_build_dependency_topological_sorter()
Added install_sorter using graph.get_install_dependency_topological_sorter()
Both sorters are prepared and ready for use

2. Simplified Build Logic

Replaced complex _find_buildable_nodes logic with simple call to build_sorter.get_ready()
Nodes become ready to build when the build dependency graph says they are
Much cleaner and more efficient than the previous manual dependency checking

3. Updated Node Completion Logic

When a node is built, it's marked as done() in the install dependency graph
All nodes that become ready in the install dependency graph are then marked as done() in the build dependency graph
This implements the exact flow you specified

How It Works

Build Readiness: Nodes are ready to build when build_sorter.get_ready() returns them
Build Completion: When mark_node_built() is called:
- Node is marked done in the install sorter
- Any install dependencies that become ready are marked done in the build sorter
- This cascades the completion through both dependency graphs

Benefits

Correctness: Uses proper topological sorting to ensure correct build order
Efficiency: No more manual dependency traversal and checking
Maintainability: Much simpler logic that's easier to understand and debug
Scalability: Handles complex dependency graphs efficiently

The implementation now properly manages both build-time and install-time dependencies using the topological sorters, ensuring that packages are built in the correct order while respecting both types of dependencies.

User

Change get_nodes_ready_to_build to be a generator returning DependencyNodeLists. Organize the buildable_nodes into separate lists based on whether the packages are exclusive or not. Each exclusive build node should be in its own list. All non-exclusive packages should be in 1 list together.

Cursor

I need to update the get_nodes_ready_to_build method to be a generator that yields separate lists based on exclusive build requirements. Let me examine the current method and then refactor it:

Now let me create a todo list and update the method:

Now I'll refactor the method to be a generator that organizes nodes by exclusivity:

import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()
        self.install_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_install_dependency_topological_sorter()

    def _find_buildable_nodes(
        self, nodes_to_build: DependencyNodeList
    ) -> DependencyNodeList:
        """Find nodes that can be built using the build dependency topological sorter."""
        # Get nodes that are ready to build from the build dependency sorter
        ready_nodes = list(self.build_sorter.get_ready())

        # Filter to only include nodes that are in our nodes_to_build list and not already built
        buildable_nodes: DependencyNodeList = [
            node
            for node in ready_nodes
            if node in nodes_to_build and not self.is_node_built(node)
        ]

        if buildable_nodes:
            logger.info(
                "ready to build from build dependency sorter: %s",
                sorted(n.key for n in buildable_nodes),
            )

        return buildable_nodes

    def _filter_for_exclusive_builds(
        self, buildable_nodes: DependencyNodeList
    ) -> DependencyNodeList:
        """Filter buildable nodes to handle exclusive build requirements."""
        # Check if any buildable node requires exclusive build (exclusive_build == True)
        exclusive_nodes: DependencyNodeList = [
            node
            for node in buildable_nodes
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build
        ]
        if exclusive_nodes:
            # Only build the first exclusive node this round
            filtered_nodes = [exclusive_nodes[0]]
            logger.info(
                f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
            )
            return filtered_nodes

        return buildable_nodes

    def get_nodes_ready_to_build(self) -> typing.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.
        
        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Separate exclusive and non-exclusive nodes
        exclusive_nodes: DependencyNodeList = []
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                exclusive_nodes.append(node)
            else:
                non_exclusive_nodes.append(node)

        # Yield each exclusive node in its own list
        for exclusive_node in exclusive_nodes:
            logger.info(
                f"{exclusive_node.canonicalized_name}: requires exclusive build, yielding it alone"
            )
            yield [exclusive_node]

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "yielding non-exclusive nodes together: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the install dependency graph
        self.install_sorter.done(node)

        # Get all nodes that are now ready in the install dependency graph
        ready_install_nodes = list(self.install_sorter.get_ready())

        # Mark all ready install nodes as done in the build dependency graph
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)

        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes)
            if ready_install_nodes
            else "none",
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get nodes that are ready to be built in this round
            buildable_nodes = build_manager.get_nodes_ready_to_build()

            # Build up to max_workers nodes concurrently (or all if max_workers is None)
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=max_workers
            ) as executor:
                futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                reqs: list[Requirement] = []
                logger.info(
                    "starting to build: %s", sorted(n.key for n in buildable_nodes)
                )
                for node in buildable_nodes:
                    req = Requirement(f"{node.canonicalized_name}=={node.version}")
                    reqs.append(req)
                    future = executor.submit(
                        _build_parallel,
                        wkctx=wkctx,
                        resolved_version=node.version,
                        req=req,
                        source_download_url=node.download_url,
                        force=force,
                        cache_wheel_server_url=cache_wheel_server_url,
                    )
                    future.add_done_callback(update_progressbar_cb)
                    futures.append(future)

                # Wait for all builds to complete
                for node, future in zip(buildable_nodes, futures, strict=True):
                    try:
                        entry = future.result()
                        entries.append(entry)
                        build_manager.mark_node_built(node)
                        # progress bar is updated in callback
                    except Exception as e:
                        logger.error(f"Failed to build {node.key}: {e}")
                        raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now I need to remove the old _filter_for_exclusive_builds method since it's no longer needed, and update the calling code to handle the generator:

import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()
        self.install_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_install_dependency_topological_sorter()

    def _find_buildable_nodes(
        self, nodes_to_build: DependencyNodeList
    ) -> DependencyNodeList:
        """Find nodes that can be built using the build dependency topological sorter."""
        # Get nodes that are ready to build from the build dependency sorter
        ready_nodes = list(self.build_sorter.get_ready())

        # Filter to only include nodes that are in our nodes_to_build list and not already built
        buildable_nodes: DependencyNodeList = [
            node
            for node in ready_nodes
            if node in nodes_to_build and not self.is_node_built(node)
        ]

        if buildable_nodes:
            logger.info(
                "ready to build from build dependency sorter: %s",
                sorted(n.key for n in buildable_nodes),
            )

        return buildable_nodes


    def get_nodes_ready_to_build(self) -> typing.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.
        
        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Separate exclusive and non-exclusive nodes
        exclusive_nodes: DependencyNodeList = []
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                exclusive_nodes.append(node)
            else:
                non_exclusive_nodes.append(node)

        # Yield each exclusive node in its own list
        for exclusive_node in exclusive_nodes:
            logger.info(
                f"{exclusive_node.canonicalized_name}: requires exclusive build, yielding it alone"
            )
            yield [exclusive_node]

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "yielding non-exclusive nodes together: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the install dependency graph
        self.install_sorter.done(node)

        # Get all nodes that are now ready in the install dependency graph
        ready_install_nodes = list(self.install_sorter.get_ready())

        # Mark all ready install nodes as done in the build dependency graph
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)

        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes)
            if ready_install_nodes
            else "none",
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get nodes that are ready to be built in this round
            buildable_nodes = build_manager.get_nodes_ready_to_build()

            # Build up to max_workers nodes concurrently (or all if max_workers is None)
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=max_workers
            ) as executor:
                futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                reqs: list[Requirement] = []
                logger.info(
                    "starting to build: %s", sorted(n.key for n in buildable_nodes)
                )
                for node in buildable_nodes:
                    req = Requirement(f"{node.canonicalized_name}=={node.version}")
                    reqs.append(req)
                    future = executor.submit(
                        _build_parallel,
                        wkctx=wkctx,
                        resolved_version=node.version,
                        req=req,
                        source_download_url=node.download_url,
                        force=force,
                        cache_wheel_server_url=cache_wheel_server_url,
                    )
                    future.add_done_callback(update_progressbar_cb)
                    futures.append(future)

                # Wait for all builds to complete
                for node, future in zip(buildable_nodes, futures, strict=True):
                    try:
                        entry = future.result()
                        entries.append(entry)
                        build_manager.mark_node_built(node)
                        # progress bar is updated in callback
                    except Exception as e:
                        logger.error(f"Failed to build {node.key}: {e}")
                        raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now I need to find and update the calling code that uses get_nodes_ready_to_build. Let me look at the build_parallel function:

Now I need to update the calling code to handle the generator. The logic should iterate over each batch of nodes yielded by the generator:

import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()
        self.install_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_install_dependency_topological_sorter()

    def _find_buildable_nodes(
        self, nodes_to_build: DependencyNodeList
    ) -> DependencyNodeList:
        """Find nodes that can be built using the build dependency topological sorter."""
        # Get nodes that are ready to build from the build dependency sorter
        ready_nodes = list(self.build_sorter.get_ready())

        # Filter to only include nodes that are in our nodes_to_build list and not already built
        buildable_nodes: DependencyNodeList = [
            node
            for node in ready_nodes
            if node in nodes_to_build and not self.is_node_built(node)
        ]

        if buildable_nodes:
            logger.info(
                "ready to build from build dependency sorter: %s",
                sorted(n.key for n in buildable_nodes),
            )

        return buildable_nodes


    def get_nodes_ready_to_build(self) -> typing.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.
        
        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Separate exclusive and non-exclusive nodes
        exclusive_nodes: DependencyNodeList = []
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                exclusive_nodes.append(node)
            else:
                non_exclusive_nodes.append(node)

        # Yield each exclusive node in its own list
        for exclusive_node in exclusive_nodes:
            logger.info(
                f"{exclusive_node.canonicalized_name}: requires exclusive build, yielding it alone"
            )
            yield [exclusive_node]

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "yielding non-exclusive nodes together: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the install dependency graph
        self.install_sorter.done(node)

        # Get all nodes that are now ready in the install dependency graph
        ready_install_nodes = list(self.install_sorter.get_ready())

        # Mark all ready install nodes as done in the build dependency graph
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)

        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes)
            if ready_install_nodes
            else "none",
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s", sorted(n.key for n in buildable_nodes)
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Let me check for any linting errors:

I need to fix the whitespace issue and update the import for the Generator type:

import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()
        self.install_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_install_dependency_topological_sorter()

    def _find_buildable_nodes(
        self, nodes_to_build: DependencyNodeList
    ) -> DependencyNodeList:
        """Find nodes that can be built using the build dependency topological sorter."""
        # Get nodes that are ready to build from the build dependency sorter
        ready_nodes = list(self.build_sorter.get_ready())

        # Filter to only include nodes that are in our nodes_to_build list and not already built
        buildable_nodes: DependencyNodeList = [
            node
            for node in ready_nodes
            if node in nodes_to_build and not self.is_node_built(node)
        ]

        if buildable_nodes:
            logger.info(
                "ready to build from build dependency sorter: %s",
                sorted(n.key for n in buildable_nodes),
            )

        return buildable_nodes


    def get_nodes_ready_to_build(self) -> typing.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Separate exclusive and non-exclusive nodes
        exclusive_nodes: DependencyNodeList = []
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                exclusive_nodes.append(node)
            else:
                non_exclusive_nodes.append(node)

        # Yield each exclusive node in its own list
        for exclusive_node in exclusive_nodes:
            logger.info(
                f"{exclusive_node.canonicalized_name}: requires exclusive build, yielding it alone"
            )
            yield [exclusive_node]

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "yielding non-exclusive nodes together: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the install dependency graph
        self.install_sorter.done(node)

        # Get all nodes that are now ready in the install dependency graph
        ready_install_nodes = list(self.install_sorter.get_ready())

        # Mark all ready install nodes as done in the build dependency graph
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)

        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes)
            if ready_install_nodes
            else "none",
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s", sorted(n.key for n in buildable_nodes)
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Perfect! I've successfully refactored the get_nodes_ready_to_build method to be a generator that organizes buildable nodes by exclusivity. Here's a summary of the changes:

Key Changes Made

1. Changed Method to Generator

Updated return type to typing.Generator[DependencyNodeList, None, None]
Method now yields separate lists instead of returning a single list

2. Organized Nodes by Exclusivity

Exclusive nodes: Each exclusive build node is yielded in its own separate list
Non-exclusive nodes: All non-exclusive nodes are yielded together in a single list
This ensures exclusive builds run alone while allowing parallel builds for non-exclusive packages

3. Removed Old Logic

Eliminated the _filter_for_exclusive_builds method since the logic is now integrated into the generator
Cleaner separation of concerns

4. Updated Calling Code

Modified the build loop to iterate over each batch yielded by the generator
Each batch is processed in its own ThreadPoolExecutor context
Maintains the same parallel building behavior but respects exclusivity requirements

How It Works

Generator yields batches: Each call to get_nodes_ready_to_build() yields multiple batches
Exclusive builds: Each exclusive package gets its own batch (list with one item)
Non-exclusive builds: All non-exclusive packages are in one batch together
Sequential batch processing: Batches are processed sequentially, but within each batch, builds can run in parallel
Proper isolation: Exclusive builds are guaranteed to run alone

Benefits

Flexible parallelism: Non-exclusive packages can build in parallel while exclusive packages build alone
Better resource management: Each batch gets its own ThreadPoolExecutor
Cleaner code: Logic is more organized and easier to understand
Maintains correctness: Respects both dependency ordering and exclusivity requirements

The implementation now properly handles the complex requirements of managing both build dependencies through topological sorting and exclusive build constraints through batch organization.

User

The _find_buildable_nodes method is redundant. Move the logic inline in get_nodes_ready_to_build(). Remove the extra filter step, that is redundant.

Cursor

You're absolutely right! The _find_buildable_nodes method is redundant since we're already using the topological sorter. Let me inline the logic and remove the redundant filtering:

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        buildable_nodes = list(self.build_sorter.get_ready())

Let me read the current state of the file to see the exact content:

Now I'll remove the _find_buildable_nodes method and inline the logic, removing the redundant filtering:

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()
        self.install_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_install_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        buildable_nodes = list(self.build_sorter.get_ready())

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Separate exclusive and non-exclusive nodes
        exclusive_nodes: DependencyNodeList = []
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                exclusive_nodes.append(node)
            else:
                non_exclusive_nodes.append(node)

        # Yield each exclusive node in its own list
        for exclusive_node in exclusive_nodes:
            logger.info(
                f"{exclusive_node.canonicalized_name}: requires exclusive build, yielding it alone"
            )
            yield [exclusive_node]

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "yielding non-exclusive nodes together: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the install dependency graph
        self.install_sorter.done(node)

        # Get all nodes that are now ready in the install dependency graph
        ready_install_nodes = list(self.install_sorter.get_ready())

        # Mark all ready install nodes as done in the build dependency graph
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)

        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes)
            if ready_install_nodes
            else "none",
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Perfect! I've successfully removed the redundant _find_buildable_nodes method and inlined the logic. The key improvements are:

Changes Made

1. Removed Redundant Method

Eliminated _find_buildable_nodes method entirely
Moved the core logic directly into get_nodes_ready_to_build

2. Removed Redundant Filtering

No longer filtering by nodes_to_build (which was just self._remaining_nodes)
No longer filtering by self.is_node_built(node) since the topological sorter already handles this correctly
The topological sorter's get_ready() method already returns only nodes that are ready to build and haven't been marked as done

3. Simplified Logic

Direct call to self.build_sorter.get_ready()
Much cleaner and more efficient

Why This Works Better

TopologicalSorter handles state: The sorter already knows which nodes are done (via done() calls) and which are ready
No double filtering: We were redundantly checking if nodes were built when the sorter already manages this state
Cleaner code: Single responsibility - the sorter manages readiness, the generator manages batching by exclusivity
Better performance: Eliminates unnecessary list comprehensions and filtering

The implementation is now much more streamlined and relies properly on the topological sorter to manage the build state, while the generator focuses solely on organizing nodes by exclusivity requirements.

User

Instead of iterating over the buildable_nodes and then again over the exclusive nodes, yield the list with each exclusive node as it is encountered.

Cursor

You're right! Instead of collecting all nodes first and then iterating again, I should yield each exclusive node immediately as it's encountered. Let me refactor the logic:

Now I'll refactor to yield exclusive nodes immediately as they're encountered:

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()
        self.install_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_install_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        buildable_nodes = list(self.build_sorter.get_ready())

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(
                    f"{node.canonicalized_name}: requires exclusive build, yielding it alone"
                )
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "yielding non-exclusive nodes together: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the install dependency graph
        self.install_sorter.done(node)

        # Get all nodes that are now ready in the install dependency graph
        ready_install_nodes = list(self.install_sorter.get_ready())

        # Mark all ready install nodes as done in the build dependency graph
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)

        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes)
            if ready_install_nodes
            else "none",
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Perfect! I've refactored the logic to be much more efficient. Here are the key improvements:

Changes Made

1. Single Pass Processing

Now iterates through buildable_nodes only once
Yields exclusive nodes immediately as they're encountered
Only collects non-exclusive nodes for later yielding

2. Eliminated Redundant Collections

Removed the exclusive_nodes list entirely
No longer need to iterate over exclusive nodes separately
Only maintains non_exclusive_nodes list

3. Immediate Yielding

Exclusive nodes are yielded as soon as they're identified
No need to collect them first and then iterate again
More memory efficient and responsive

Benefits

Better Performance: Single iteration instead of double iteration
Lower Memory Usage: Don't need to store exclusive nodes in a separate list
More Responsive: Exclusive builds can start immediately when encountered
Cleaner Code: Simpler logic flow with fewer intermediate collections

The refactored method now processes nodes in a single pass, yielding exclusive nodes immediately and collecting non-exclusive nodes for a final batch yield. This is much more efficient than the previous approach of collecting everything first and then iterating again.

User

Run the linter and mypy

Cursor

I'll run the linter and mypy to check for any issues in the code.

Excellent! Both the linter and mypy checks passed successfully:

Linter Results ✅

Ruff check: All checks passed!
Ruff format: 80 files already formatted
Mergify lint: All expected jobs found and configured correctly

MyPy Results ✅

Source files: Success: no issues found in 49 source files
Test files: Success: no issues found in 31 source files

Summary

The code changes we made are clean and properly typed:

✅ Added topological sorters to DependencyGraph - properly typed with graphlib.TopologicalSorter[DependencyNode]
✅ Updated ParallelBuildManager - uses the new sorters correctly
✅ Refactored build logic - simplified and more efficient
✅ Generator implementation - properly typed with collections.abc.Generator
✅ Code style - follows project formatting standards
✅ Type annotations - all types are correctly specified

All the refactoring work has been completed successfully with no linting or type checking issues. The code is ready for use and maintains high code quality standards.

User

Run the unit tests

Cursor

I'll run the unit tests to make sure all our changes work correctly.

The unit tests are failing because the tests were written for the old API where get_nodes_ready_to_build() returned a list, but now it returns a generator. I need to update the tests to handle the new generator-based API. Let me examine the test file and fix the issues:

The main issues are:

Generator vs List: Tests expect a list but now get a generator
TopologicalSorter.done() errors: Can't call done() on nodes that weren't returned by get_ready()

Let me examine the test file to understand the expected behavior:

I see the issue. The tests expect get_nodes_ready_to_build() to return a single list of nodes, but now it returns a generator that yields multiple batches. I need to update the tests to handle this new behavior. Let me also check if there are issues with the TopologicalSorter usage:

The core issue is that the tests need to be updated to work with the new generator API. The tests are trying to:

Get a list of buildable nodes (now it's a generator of batches)
Call mark_node_built() on nodes that weren't returned by get_ready() (TopologicalSorter constraint)

I need to create a helper function to collect all nodes from the generator and fix the TopologicalSorter usage. Let me update the tests:

from unittest.mock import Mock, patch

import pytest
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from packaging.version import Version

from fromager import context, dependency_graph, requirements_file
from fromager.commands.build import ParallelBuildManager


def create_test_graph():
    """Create a test dependency graph with various dependency scenarios.

    Graph structure:
    - app_main: Top-level app (depends on lib_shared for install, tool_build for build)
    - lib_shared: Shared library (depends on util_base for install, no build deps)
    - util_base: Base utility (no dependencies - leaf node)
    - tool_build: Build tool (depends on util_base for build and install)
    - plugin_extra: Plugin (depends on lib_shared for install, tool_build for build)
    - helper_internal: Internal helper (only used by tool_build, not top-level)

    Expected build order: util_base → helper_internal → tool_build → lib_shared → (app_main, plugin_extra)
    """
    graph = dependency_graph.DependencyGraph()

    # Add top-level dependencies (directly requested by user)
    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("app_main==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/app_main-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("plugin_extra==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/plugin_extra-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("app_main"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_SYSTEM,
        req=Requirement("tool_build==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/tool_build-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("app_main"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("lib_shared"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("util_base==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/util_base-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("plugin_extra"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_BACKEND,
        req=Requirement("tool_build==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/tool_build-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("plugin_extra"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    # tool_build build dependencies (not top-level, only used by others)
    graph.add_dependency(
        parent_name=canonicalize_name("tool_build"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_SYSTEM,
        req=Requirement("util_base==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/util_base-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("tool_build"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("helper_internal==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/helper_internal-1.0.tar.gz",
    )

    # util_base has no dependencies (leaf node)
    # helper_internal has no dependencies (leaf node, not top-level)

    return graph


@pytest.fixture
def mock_context():
    """Create a mock WorkContext for testing."""
    ctx = Mock(spec=context.WorkContext)
    ctx.settings = Mock()

    def mock_package_build_info(name):
        mock_pbi = Mock()
        mock_pbi.exclusive_build = False
        return mock_pbi

    ctx.settings.package_build_info = mock_package_build_info
    return ctx


@pytest.fixture
def test_graph():
    """Provide the test dependency graph."""
    return create_test_graph()


@pytest.fixture
def build_manager(mock_context, test_graph):
    """Create a ParallelBuildManager instance for testing."""
    return ParallelBuildManager(mock_context, test_graph)


class TestParallelBuildManager:
    """Test suite for ParallelBuildManager class."""

    def test_initialization(self, build_manager, test_graph):
        """Test that the manager initializes correctly."""
        assert build_manager.wkctx is not None
        assert build_manager.graph == test_graph
        assert len(build_manager.built_node_keys) == 0
        assert len(build_manager._remaining_nodes) == 6  # 6 non-root nodes
        assert build_manager.have_remaining_nodes()

    def test_have_remaining_nodes_initially_true(self, build_manager):
        """Test that initially there are remaining nodes."""
        assert build_manager.have_remaining_nodes()

    def test_have_remaining_nodes_false_when_all_built(self, build_manager, test_graph):
        """Test that have_remaining_nodes returns False when all nodes are built."""
        # Mark all nodes as built
        for node in test_graph.nodes.values():
            if node.key != dependency_graph.ROOT:
                build_manager.mark_node_built(node)

        assert not build_manager.have_remaining_nodes()

    def test_mark_node_built(self, build_manager, test_graph):
        """Test marking nodes as built."""
        util_base = test_graph.nodes["util-base==1.0"]

        assert not build_manager.is_node_built(util_base)
        assert util_base.key not in build_manager.built_node_keys
        assert util_base in build_manager._remaining_nodes

        build_manager.mark_node_built(util_base)

        # Verify node is now marked as built
        assert build_manager.is_node_built(util_base)
        assert util_base.key in build_manager.built_node_keys
        assert util_base not in build_manager._remaining_nodes

    def test_nodes_with_no_dependencies_buildable_first(self, build_manager):
        """Test that leaf nodes (no dependencies) are buildable first."""
        # Collect all nodes from all batches
        all_buildable_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_buildable_nodes.extend(batch)

        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert (
            len(all_buildable_nodes) == 3
        )  # util-base, helper-internal, and lib-shared (no build deps)
        assert "util-base" in buildable_names
        assert "helper-internal" in buildable_names
        assert "lib-shared" in buildable_names

    def test_node_with_build_dependencies(self, build_manager, test_graph):
        """Test that tool_build becomes buildable after its dependencies are built."""
        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}
        assert "util-base" in buildable_names
        assert "helper-internal" in buildable_names
        assert "tool-build" not in buildable_names

        util_base = test_graph.nodes["util-base==1.0"]
        helper_internal = test_graph.nodes["helper-internal==1.0"]
        build_manager.mark_node_built(util_base)
        build_manager.mark_node_built(helper_internal)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}
        assert "tool-build" in buildable_names

    def test_transitive_dependencies(self, build_manager, test_graph):
        """Test that transitive dependencies are handled correctly.

        app_main depends on tool_build for build, and tool_build depends on helper_internal for install.
        So app_main should only be buildable after util_base, helper_internal, and tool_build are built.
        """
        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" not in buildable_names
        assert (
            "helper-internal" in buildable_names
        )  # helper_internal should still be buildable

        helper_internal = test_graph.nodes["helper-internal==1.0"]
        build_manager.mark_node_built(helper_internal)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "tool-build" in buildable_names
        assert "app-main" not in buildable_names

        tool_build = test_graph.nodes["tool-build==1.0"]
        build_manager.mark_node_built(tool_build)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "lib-shared" in buildable_names

    def test_multiple_build_dependencies(self, build_manager, test_graph):
        """Test that app_main waits for all its dependencies."""
        # app_main depends on tool_build for build and lib_shared for install
        # tool_build depends on util_base and helper_internal
        # lib_shared depends on util_base

        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" not in buildable_names

        helper_internal = test_graph.nodes["helper-internal==1.0"]
        build_manager.mark_node_built(helper_internal)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}
        assert "tool-build" in buildable_names

        tool_build = test_graph.nodes["tool-build==1.0"]
        build_manager.mark_node_built(tool_build)

        lib_shared = test_graph.nodes["lib-shared==1.0"]
        build_manager.mark_node_built(lib_shared)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" in buildable_names

    def test_circular_dependency_detection(self, build_manager, test_graph):
        """Test that circular dependencies are detected."""
        # Create a scenario where no nodes can be built but some remain
        # Mark all nodes as remaining but create impossible conditions

        # Remove all leaf nodes from remaining nodes but don't mark them as built
        # This simulates a circular dependency scenario where no nodes can be built
        build_manager._remaining_nodes = [
            node
            for node in build_manager._remaining_nodes
            if node.canonicalized_name
            not in ["util-base", "helper-internal", "lib-shared"]
        ]

        with pytest.raises(ValueError, match="Circular dependency detected"):
            build_manager.get_nodes_ready_to_build()

    @patch("fromager.commands.build.logger")
    def test_logging_messages(self, mock_logger, build_manager, test_graph):
        """Test that appropriate logging messages are generated."""
        build_manager.get_nodes_ready_to_build()

        mock_logger.info.assert_called()
        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        assert any("ready to build" in msg for msg in log_calls)

        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)
        mock_logger.reset_mock()

        build_manager.get_nodes_ready_to_build()

        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        assert any("waiting for" in msg for msg in log_calls)


class TestExclusiveBuildHandling:
    """Test suite specifically for exclusive build handling."""

    def create_exclusive_build_graph(self):
        """Create a graph where some nodes require exclusive builds."""
        graph = dependency_graph.DependencyGraph()

        for name in ["normal_a", "normal_b", "exclusive_c"]:
            graph.add_dependency(
                parent_name=None,
                parent_version=None,
                req_type=requirements_file.RequirementType.INSTALL,
                req=Requirement(f"{name}==1.0"),
                req_version=Version("1.0"),
                download_url=f"http://example.com/{name}-1.0.tar.gz",
            )

        return graph

    @pytest.fixture
    def exclusive_build_manager(self, mock_context):
        """Create a manager with exclusive build settings."""
        graph = self.create_exclusive_build_graph()

        def mock_package_build_info(name):
            mock_pbi = Mock()
            mock_pbi.exclusive_build = name == "exclusive-c"
            return mock_pbi

        mock_context.settings.package_build_info = mock_package_build_info

        return ParallelBuildManager(mock_context, graph)

    def test_exclusive_build_isolation(self, exclusive_build_manager):
        """Test that exclusive build nodes are not mixed with other nodes."""
        buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()

        assert len(buildable_nodes) == 1
        assert buildable_nodes[0].canonicalized_name == "exclusive-c"

    def test_normal_nodes_built_together_when_no_exclusive(
        self, exclusive_build_manager
    ):
        """Test that normal nodes can be built together when no exclusive nodes are ready."""
        exclusive_node = None
        for node in exclusive_build_manager._remaining_nodes:
            if node.canonicalized_name == "exclusive-c":
                exclusive_node = node
                break

        assert exclusive_node is not None
        exclusive_build_manager.mark_node_built(exclusive_node)

        buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert len(buildable_nodes) == 2
        assert "normal-a" in buildable_names
        assert "normal-b" in buildable_names
        assert "exclusive-c" not in buildable_names

    def test_multiple_exclusive_nodes_only_first_selected(self, mock_context):
        """Test that when multiple exclusive nodes are ready, only the first is selected."""
        graph = dependency_graph.DependencyGraph()

        for name in ["exclusive_a", "exclusive_b", "normal_c"]:
            graph.add_dependency(
                parent_name=None,
                parent_version=None,
                req_type=requirements_file.RequirementType.INSTALL,
                req=Requirement(f"{name}==1.0"),
                req_version=Version("1.0"),
                download_url=f"http://example.com/{name}-1.0.tar.gz",
            )

        def mock_package_build_info(name):
            mock_pbi = Mock()
            mock_pbi.exclusive_build = name.startswith("exclusive-")
            return mock_pbi

        mock_context.settings.package_build_info = mock_package_build_info
        manager = ParallelBuildManager(mock_context, graph)

        buildable_nodes = manager.get_nodes_ready_to_build()

        assert len(buildable_nodes) == 1
        assert buildable_nodes[0].canonicalized_name.startswith("exclusive-")

    @patch("fromager.commands.build.logger")
    def test_exclusive_build_logging(self, mock_logger, exclusive_build_manager):
        """Test that exclusive build scenarios are logged appropriately."""
        exclusive_build_manager.get_nodes_ready_to_build()

        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        exclusive_log_found = any(
            "requires exclusive build" in msg for msg in log_calls
        )
        assert exclusive_log_found

Now let me continue updating the other test methods. The key insight is that I need to fix the TopologicalSorter issue - we can only call done() on nodes that were returned by get_ready(). Let me create a helper method to get ready nodes and mark them as built properly:

from unittest.mock import Mock, patch

import pytest
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from packaging.version import Version

from fromager import context, dependency_graph, requirements_file
from fromager.commands.build import ParallelBuildManager


def create_test_graph():
    """Create a test dependency graph with various dependency scenarios.

    Graph structure:
    - app_main: Top-level app (depends on lib_shared for install, tool_build for build)
    - lib_shared: Shared library (depends on util_base for install, no build deps)
    - util_base: Base utility (no dependencies - leaf node)
    - tool_build: Build tool (depends on util_base for build and install)
    - plugin_extra: Plugin (depends on lib_shared for install, tool_build for build)
    - helper_internal: Internal helper (only used by tool_build, not top-level)

    Expected build order: util_base → helper_internal → tool_build → lib_shared → (app_main, plugin_extra)
    """
    graph = dependency_graph.DependencyGraph()

    # Add top-level dependencies (directly requested by user)
    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("app_main==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/app_main-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("plugin_extra==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/plugin_extra-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("app_main"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_SYSTEM,
        req=Requirement("tool_build==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/tool_build-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("app_main"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("lib_shared"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("util_base==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/util_base-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("plugin_extra"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_BACKEND,
        req=Requirement("tool_build==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/tool_build-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("plugin_extra"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    # tool_build build dependencies (not top-level, only used by others)
    graph.add_dependency(
        parent_name=canonicalize_name("tool_build"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_SYSTEM,
        req=Requirement("util_base==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/util_base-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("tool_build"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("helper_internal==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/helper_internal-1.0.tar.gz",
    )

    # util_base has no dependencies (leaf node)
    # helper_internal has no dependencies (leaf node, not top-level)

    return graph


@pytest.fixture
def mock_context():
    """Create a mock WorkContext for testing."""
    ctx = Mock(spec=context.WorkContext)
    ctx.settings = Mock()

    def mock_package_build_info(name):
        mock_pbi = Mock()
        mock_pbi.exclusive_build = False
        return mock_pbi

    ctx.settings.package_build_info = mock_package_build_info
    return ctx


@pytest.fixture
def test_graph():
    """Provide the test dependency graph."""
    return create_test_graph()


@pytest.fixture
def build_manager(mock_context, test_graph):
    """Create a ParallelBuildManager instance for testing."""
    return ParallelBuildManager(mock_context, test_graph)


class TestParallelBuildManager:
    """Test suite for ParallelBuildManager class."""

    def test_initialization(self, build_manager, test_graph):
        """Test that the manager initializes correctly."""
        assert build_manager.wkctx is not None
        assert build_manager.graph == test_graph
        assert len(build_manager.built_node_keys) == 0
        assert len(build_manager._remaining_nodes) == 6  # 6 non-root nodes
        assert build_manager.have_remaining_nodes()

    def test_have_remaining_nodes_initially_true(self, build_manager):
        """Test that initially there are remaining nodes."""
        assert build_manager.have_remaining_nodes()

    def test_have_remaining_nodes_false_when_all_built(self, build_manager, test_graph):
        """Test that have_remaining_nodes returns False when all nodes are built."""
        # Mark all nodes as built
        for node in test_graph.nodes.values():
            if node.key != dependency_graph.ROOT:
                build_manager.mark_node_built(node)

        assert not build_manager.have_remaining_nodes()

    def test_mark_node_built(self, build_manager, test_graph):
        """Test marking nodes as built."""
        util_base = test_graph.nodes["util-base==1.0"]

        assert not build_manager.is_node_built(util_base)
        assert util_base.key not in build_manager.built_node_keys
        assert util_base in build_manager._remaining_nodes

        build_manager.mark_node_built(util_base)

        # Verify node is now marked as built
        assert build_manager.is_node_built(util_base)
        assert util_base.key in build_manager.built_node_keys
        assert util_base not in build_manager._remaining_nodes

    def test_nodes_with_no_dependencies_buildable_first(self, build_manager):
        """Test that leaf nodes (no dependencies) are buildable first."""
        # Collect all nodes from all batches
        all_buildable_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_buildable_nodes.extend(batch)

        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert (
            len(all_buildable_nodes) == 3
        )  # util-base, helper-internal, and lib-shared (no build deps)
        assert "util-base" in buildable_names
        assert "helper-internal" in buildable_names
        assert "lib-shared" in buildable_names

    def test_node_with_build_dependencies(self, build_manager, test_graph):
        """Test that tool_build becomes buildable after its dependencies are built."""
        # Collect all nodes from all batches
        all_buildable_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_buildable_nodes.extend(batch)
        
        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert "util-base" in buildable_names
        assert "helper-internal" in buildable_names
        assert "tool-build" not in buildable_names

        # Mark the ready nodes as built (only nodes returned by get_ready can be marked as done)
        util_base = next(node for node in all_buildable_nodes if node.canonicalized_name == "util-base")
        helper_internal = next(node for node in all_buildable_nodes if node.canonicalized_name == "helper-internal")
        build_manager.mark_node_built(util_base)
        build_manager.mark_node_built(helper_internal)

        # Get next batch of buildable nodes
        all_buildable_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_buildable_nodes.extend(batch)
        
        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert "tool-build" in buildable_names

    def test_transitive_dependencies(self, build_manager, test_graph):
        """Test that transitive dependencies are handled correctly.

        app_main depends on tool_build for build, and tool_build depends on helper_internal for install.
        So app_main should only be buildable after util_base, helper_internal, and tool_build are built.
        """
        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" not in buildable_names
        assert (
            "helper-internal" in buildable_names
        )  # helper_internal should still be buildable

        helper_internal = test_graph.nodes["helper-internal==1.0"]
        build_manager.mark_node_built(helper_internal)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "tool-build" in buildable_names
        assert "app-main" not in buildable_names

        tool_build = test_graph.nodes["tool-build==1.0"]
        build_manager.mark_node_built(tool_build)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "lib-shared" in buildable_names

    def test_multiple_build_dependencies(self, build_manager, test_graph):
        """Test that app_main waits for all its dependencies."""
        # app_main depends on tool_build for build and lib_shared for install
        # tool_build depends on util_base and helper_internal
        # lib_shared depends on util_base

        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" not in buildable_names

        helper_internal = test_graph.nodes["helper-internal==1.0"]
        build_manager.mark_node_built(helper_internal)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}
        assert "tool-build" in buildable_names

        tool_build = test_graph.nodes["tool-build==1.0"]
        build_manager.mark_node_built(tool_build)

        lib_shared = test_graph.nodes["lib-shared==1.0"]
        build_manager.mark_node_built(lib_shared)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" in buildable_names

    def test_circular_dependency_detection(self, build_manager, test_graph):
        """Test that circular dependencies are detected."""
        # Create a scenario where no nodes can be built but some remain
        # Mark all nodes as remaining but create impossible conditions

        # Remove all leaf nodes from remaining nodes but don't mark them as built
        # This simulates a circular dependency scenario where no nodes can be built
        build_manager._remaining_nodes = [
            node
            for node in build_manager._remaining_nodes
            if node.canonicalized_name
            not in ["util-base", "helper-internal", "lib-shared"]
        ]

        with pytest.raises(ValueError, match="Circular dependency detected"):
            build_manager.get_nodes_ready_to_build()

    @patch("fromager.commands.build.logger")
    def test_logging_messages(self, mock_logger, build_manager, test_graph):
        """Test that appropriate logging messages are generated."""
        build_manager.get_nodes_ready_to_build()

        mock_logger.info.assert_called()
        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        assert any("ready to build" in msg for msg in log_calls)

        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)
        mock_logger.reset_mock()

        build_manager.get_nodes_ready_to_build()

        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        assert any("waiting for" in msg for msg in log_calls)


class TestExclusiveBuildHandling:
    """Test suite specifically for exclusive build handling."""

    def create_exclusive_build_graph(self):
        """Create a graph where some nodes require exclusive builds."""
        graph = dependency_graph.DependencyGraph()

        for name in ["normal_a", "normal_b", "exclusive_c"]:
            graph.add_dependency(
                parent_name=None,
                parent_version=None,
                req_type=requirements_file.RequirementType.INSTALL,
                req=Requirement(f"{name}==1.0"),
                req_version=Version("1.0"),
                download_url=f"http://example.com/{name}-1.0.tar.gz",
            )

        return graph

    @pytest.fixture
    def exclusive_build_manager(self, mock_context):
        """Create a manager with exclusive build settings."""
        graph = self.create_exclusive_build_graph()

        def mock_package_build_info(name):
            mock_pbi = Mock()
            mock_pbi.exclusive_build = name == "exclusive-c"
            return mock_pbi

        mock_context.settings.package_build_info = mock_package_build_info

        return ParallelBuildManager(mock_context, graph)

    def test_exclusive_build_isolation(self, exclusive_build_manager):
        """Test that exclusive build nodes are not mixed with other nodes."""
        buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()

        assert len(buildable_nodes) == 1
        assert buildable_nodes[0].canonicalized_name == "exclusive-c"

    def test_normal_nodes_built_together_when_no_exclusive(
        self, exclusive_build_manager
    ):
        """Test that normal nodes can be built together when no exclusive nodes are ready."""
        exclusive_node = None
        for node in exclusive_build_manager._remaining_nodes:
            if node.canonicalized_name == "exclusive-c":
                exclusive_node = node
                break

        assert exclusive_node is not None
        exclusive_build_manager.mark_node_built(exclusive_node)

        buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert len(buildable_nodes) == 2
        assert "normal-a" in buildable_names
        assert "normal-b" in buildable_names
        assert "exclusive-c" not in buildable_names

    def test_multiple_exclusive_nodes_only_first_selected(self, mock_context):
        """Test that when multiple exclusive nodes are ready, only the first is selected."""
        graph = dependency_graph.DependencyGraph()

        for name in ["exclusive_a", "exclusive_b", "normal_c"]:
            graph.add_dependency(
                parent_name=None,
                parent_version=None,
                req_type=requirements_file.RequirementType.INSTALL,
                req=Requirement(f"{name}==1.0"),
                req_version=Version("1.0"),
                download_url=f"http://example.com/{name}-1.0.tar.gz",
            )

        def mock_package_build_info(name):
            mock_pbi = Mock()
            mock_pbi.exclusive_build = name.startswith("exclusive-")
            return mock_pbi

        mock_context.settings.package_build_info = mock_package_build_info
        manager = ParallelBuildManager(mock_context, graph)

        buildable_nodes = manager.get_nodes_ready_to_build()

        assert len(buildable_nodes) == 1
        assert buildable_nodes[0].canonicalized_name.startswith("exclusive-")

    @patch("fromager.commands.build.logger")
    def test_exclusive_build_logging(self, mock_logger, exclusive_build_manager):
        """Test that exclusive build scenarios are logged appropriately."""
        exclusive_build_manager.get_nodes_ready_to_build()

        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        exclusive_log_found = any(
            "requires exclusive build" in msg for msg in log_calls
        )
        assert exclusive_log_found

This is going to be quite a lot of test updates. Let me create a helper function to make this easier and then update all the failing tests systematically. First, let me add a helper function at the top of the test class:

Let me add a helper method to the test class:

from unittest.mock import Mock, patch

import pytest
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from packaging.version import Version

from fromager import context, dependency_graph, requirements_file
from fromager.commands.build import ParallelBuildManager


def create_test_graph():
    """Create a test dependency graph with various dependency scenarios.

    Graph structure:
    - app_main: Top-level app (depends on lib_shared for install, tool_build for build)
    - lib_shared: Shared library (depends on util_base for install, no build deps)
    - util_base: Base utility (no dependencies - leaf node)
    - tool_build: Build tool (depends on util_base for build and install)
    - plugin_extra: Plugin (depends on lib_shared for install, tool_build for build)
    - helper_internal: Internal helper (only used by tool_build, not top-level)

    Expected build order: util_base → helper_internal → tool_build → lib_shared → (app_main, plugin_extra)
    """
    graph = dependency_graph.DependencyGraph()

    # Add top-level dependencies (directly requested by user)
    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("app_main==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/app_main-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("plugin_extra==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/plugin_extra-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("app_main"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_SYSTEM,
        req=Requirement("tool_build==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/tool_build-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("app_main"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("lib_shared"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("util_base==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/util_base-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("plugin_extra"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_BACKEND,
        req=Requirement("tool_build==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/tool_build-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("plugin_extra"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    # tool_build build dependencies (not top-level, only used by others)
    graph.add_dependency(
        parent_name=canonicalize_name("tool_build"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_SYSTEM,
        req=Requirement("util_base==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/util_base-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("tool_build"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("helper_internal==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/helper_internal-1.0.tar.gz",
    )

    # util_base has no dependencies (leaf node)
    # helper_internal has no dependencies (leaf node, not top-level)

    return graph


@pytest.fixture
def mock_context():
    """Create a mock WorkContext for testing."""
    ctx = Mock(spec=context.WorkContext)
    ctx.settings = Mock()

    def mock_package_build_info(name):
        mock_pbi = Mock()
        mock_pbi.exclusive_build = False
        return mock_pbi

    ctx.settings.package_build_info = mock_package_build_info
    return ctx


@pytest.fixture
def test_graph():
    """Provide the test dependency graph."""
    return create_test_graph()


@pytest.fixture
def build_manager(mock_context, test_graph):
    """Create a ParallelBuildManager instance for testing."""
    return ParallelBuildManager(mock_context, test_graph)


class TestParallelBuildManager:
    """Test suite for ParallelBuildManager class."""

    def _get_all_buildable_nodes(self, build_manager):
        """Helper to collect all nodes from the generator."""
        all_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_nodes.extend(batch)
        return all_nodes

    def test_initialization(self, build_manager, test_graph):
        """Test that the manager initializes correctly."""
        assert build_manager.wkctx is not None
        assert build_manager.graph == test_graph
        assert len(build_manager.built_node_keys) == 0
        assert len(build_manager._remaining_nodes) == 6  # 6 non-root nodes
        assert build_manager.have_remaining_nodes()

    def test_have_remaining_nodes_initially_true(self, build_manager):
        """Test that initially there are remaining nodes."""
        assert build_manager.have_remaining_nodes()

    def test_have_remaining_nodes_false_when_all_built(self, build_manager, test_graph):
        """Test that have_remaining_nodes returns False when all nodes are built."""
        # Mark all nodes as built
        for node in test_graph.nodes.values():
            if node.key != dependency_graph.ROOT:
                build_manager.mark_node_built(node)

        assert not build_manager.have_remaining_nodes()

    def test_mark_node_built(self, build_manager, test_graph):
        """Test marking nodes as built."""
        util_base = test_graph.nodes["util-base==1.0"]

        assert not build_manager.is_node_built(util_base)
        assert util_base.key not in build_manager.built_node_keys
        assert util_base in build_manager._remaining_nodes

        build_manager.mark_node_built(util_base)

        # Verify node is now marked as built
        assert build_manager.is_node_built(util_base)
        assert util_base.key in build_manager.built_node_keys
        assert util_base not in build_manager._remaining_nodes

    def test_nodes_with_no_dependencies_buildable_first(self, build_manager):
        """Test that leaf nodes (no dependencies) are buildable first."""
        # Collect all nodes from all batches
        all_buildable_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_buildable_nodes.extend(batch)

        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert (
            len(all_buildable_nodes) == 3
        )  # util-base, helper-internal, and lib-shared (no build deps)
        assert "util-base" in buildable_names
        assert "helper-internal" in buildable_names
        assert "lib-shared" in buildable_names

    def test_node_with_build_dependencies(self, build_manager, test_graph):
        """Test that tool_build becomes buildable after its dependencies are built."""
        # Collect all nodes from all batches
        all_buildable_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_buildable_nodes.extend(batch)
        
        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert "util-base" in buildable_names
        assert "helper-internal" in buildable_names
        assert "tool-build" not in buildable_names

        # Mark the ready nodes as built (only nodes returned by get_ready can be marked as done)
        util_base = next(node for node in all_buildable_nodes if node.canonicalized_name == "util-base")
        helper_internal = next(node for node in all_buildable_nodes if node.canonicalized_name == "helper-internal")
        build_manager.mark_node_built(util_base)
        build_manager.mark_node_built(helper_internal)

        # Get next batch of buildable nodes
        all_buildable_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_buildable_nodes.extend(batch)
        
        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert "tool-build" in buildable_names

    def test_transitive_dependencies(self, build_manager, test_graph):
        """Test that transitive dependencies are handled correctly.

        app_main depends on tool_build for build, and tool_build depends on helper_internal for install.
        So app_main should only be buildable after util_base, helper_internal, and tool_build are built.
        """
        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" not in buildable_names
        assert (
            "helper-internal" in buildable_names
        )  # helper_internal should still be buildable

        helper_internal = test_graph.nodes["helper-internal==1.0"]
        build_manager.mark_node_built(helper_internal)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "tool-build" in buildable_names
        assert "app-main" not in buildable_names

        tool_build = test_graph.nodes["tool-build==1.0"]
        build_manager.mark_node_built(tool_build)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "lib-shared" in buildable_names

    def test_multiple_build_dependencies(self, build_manager, test_graph):
        """Test that app_main waits for all its dependencies."""
        # app_main depends on tool_build for build and lib_shared for install
        # tool_build depends on util_base and helper_internal
        # lib_shared depends on util_base

        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" not in buildable_names

        helper_internal = test_graph.nodes["helper-internal==1.0"]
        build_manager.mark_node_built(helper_internal)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}
        assert "tool-build" in buildable_names

        tool_build = test_graph.nodes["tool-build==1.0"]
        build_manager.mark_node_built(tool_build)

        lib_shared = test_graph.nodes["lib-shared==1.0"]
        build_manager.mark_node_built(lib_shared)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" in buildable_names

    def test_circular_dependency_detection(self, build_manager, test_graph):
        """Test that circular dependencies are detected."""
        # Create a scenario where no nodes can be built but some remain
        # Mark all nodes as remaining but create impossible conditions

        # Remove all leaf nodes from remaining nodes but don't mark them as built
        # This simulates a circular dependency scenario where no nodes can be built
        build_manager._remaining_nodes = [
            node
            for node in build_manager._remaining_nodes
            if node.canonicalized_name
            not in ["util-base", "helper-internal", "lib-shared"]
        ]

        with pytest.raises(ValueError, match="Circular dependency detected"):
            build_manager.get_nodes_ready_to_build()

    @patch("fromager.commands.build.logger")
    def test_logging_messages(self, mock_logger, build_manager, test_graph):
        """Test that appropriate logging messages are generated."""
        build_manager.get_nodes_ready_to_build()

        mock_logger.info.assert_called()
        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        assert any("ready to build" in msg for msg in log_calls)

        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)
        mock_logger.reset_mock()

        build_manager.get_nodes_ready_to_build()

        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        assert any("waiting for" in msg for msg in log_calls)


class TestExclusiveBuildHandling:
    """Test suite specifically for exclusive build handling."""

    def create_exclusive_build_graph(self):
        """Create a graph where some nodes require exclusive builds."""
        graph = dependency_graph.DependencyGraph()

        for name in ["normal_a", "normal_b", "exclusive_c"]:
            graph.add_dependency(
                parent_name=None,
                parent_version=None,
                req_type=requirements_file.RequirementType.INSTALL,
                req=Requirement(f"{name}==1.0"),
                req_version=Version("1.0"),
                download_url=f"http://example.com/{name}-1.0.tar.gz",
            )

        return graph

    @pytest.fixture
    def exclusive_build_manager(self, mock_context):
        """Create a manager with exclusive build settings."""
        graph = self.create_exclusive_build_graph()

        def mock_package_build_info(name):
            mock_pbi = Mock()
            mock_pbi.exclusive_build = name == "exclusive-c"
            return mock_pbi

        mock_context.settings.package_build_info = mock_package_build_info

        return ParallelBuildManager(mock_context, graph)

    def test_exclusive_build_isolation(self, exclusive_build_manager):
        """Test that exclusive build nodes are not mixed with other nodes."""
        buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()

        assert len(buildable_nodes) == 1
        assert buildable_nodes[0].canonicalized_name == "exclusive-c"

    def test_normal_nodes_built_together_when_no_exclusive(
        self, exclusive_build_manager
    ):
        """Test that normal nodes can be built together when no exclusive nodes are ready."""
        exclusive_node = None
        for node in exclusive_build_manager._remaining_nodes:
            if node.canonicalized_name == "exclusive-c":
                exclusive_node = node
                break

        assert exclusive_node is not None
        exclusive_build_manager.mark_node_built(exclusive_node)

        buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert len(buildable_nodes) == 2
        assert "normal-a" in buildable_names
        assert "normal-b" in buildable_names
        assert "exclusive-c" not in buildable_names

    def test_multiple_exclusive_nodes_only_first_selected(self, mock_context):
        """Test that when multiple exclusive nodes are ready, only the first is selected."""
        graph = dependency_graph.DependencyGraph()

        for name in ["exclusive_a", "exclusive_b", "normal_c"]:
            graph.add_dependency(
                parent_name=None,
                parent_version=None,
                req_type=requirements_file.RequirementType.INSTALL,
                req=Requirement(f"{name}==1.0"),
                req_version=Version("1.0"),
                download_url=f"http://example.com/{name}-1.0.tar.gz",
            )

        def mock_package_build_info(name):
            mock_pbi = Mock()
            mock_pbi.exclusive_build = name.startswith("exclusive-")
            return mock_pbi

        mock_context.settings.package_build_info = mock_package_build_info
        manager = ParallelBuildManager(mock_context, graph)

        buildable_nodes = manager.get_nodes_ready_to_build()

        assert len(buildable_nodes) == 1
        assert buildable_nodes[0].canonicalized_name.startswith("exclusive-")

    @patch("fromager.commands.build.logger")
    def test_exclusive_build_logging(self, mock_logger, exclusive_build_manager):
        """Test that exclusive build scenarios are logged appropriately."""
        exclusive_build_manager.get_nodes_ready_to_build()

        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        exclusive_log_found = any(
            "requires exclusive build" in msg for msg in log_calls
        )
        assert exclusive_log_found

Now let me update the first test to use the helper:

from unittest.mock import Mock, patch

import pytest
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from packaging.version import Version

from fromager import context, dependency_graph, requirements_file
from fromager.commands.build import ParallelBuildManager


def create_test_graph():
    """Create a test dependency graph with various dependency scenarios.

    Graph structure:
    - app_main: Top-level app (depends on lib_shared for install, tool_build for build)
    - lib_shared: Shared library (depends on util_base for install, no build deps)
    - util_base: Base utility (no dependencies - leaf node)
    - tool_build: Build tool (depends on util_base for build and install)
    - plugin_extra: Plugin (depends on lib_shared for install, tool_build for build)
    - helper_internal: Internal helper (only used by tool_build, not top-level)

    Expected build order: util_base → helper_internal → tool_build → lib_shared → (app_main, plugin_extra)
    """
    graph = dependency_graph.DependencyGraph()

    # Add top-level dependencies (directly requested by user)
    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("app_main==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/app_main-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("plugin_extra==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/plugin_extra-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("app_main"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_SYSTEM,
        req=Requirement("tool_build==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/tool_build-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("app_main"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("lib_shared"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("util_base==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/util_base-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("plugin_extra"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_BACKEND,
        req=Requirement("tool_build==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/tool_build-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("plugin_extra"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    # tool_build build dependencies (not top-level, only used by others)
    graph.add_dependency(
        parent_name=canonicalize_name("tool_build"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_SYSTEM,
        req=Requirement("util_base==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/util_base-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("tool_build"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("helper_internal==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/helper_internal-1.0.tar.gz",
    )

    # util_base has no dependencies (leaf node)
    # helper_internal has no dependencies (leaf node, not top-level)

    return graph


@pytest.fixture
def mock_context():
    """Create a mock WorkContext for testing."""
    ctx = Mock(spec=context.WorkContext)
    ctx.settings = Mock()

    def mock_package_build_info(name):
        mock_pbi = Mock()
        mock_pbi.exclusive_build = False
        return mock_pbi

    ctx.settings.package_build_info = mock_package_build_info
    return ctx


@pytest.fixture
def test_graph():
    """Provide the test dependency graph."""
    return create_test_graph()


@pytest.fixture
def build_manager(mock_context, test_graph):
    """Create a ParallelBuildManager instance for testing."""
    return ParallelBuildManager(mock_context, test_graph)


class TestParallelBuildManager:
    """Test suite for ParallelBuildManager class."""

    def _get_all_buildable_nodes(self, build_manager):
        """Helper to collect all nodes from the generator."""
        all_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_nodes.extend(batch)
        return all_nodes

    def test_initialization(self, build_manager, test_graph):
        """Test that the manager initializes correctly."""
        assert build_manager.wkctx is not None
        assert build_manager.graph == test_graph
        assert len(build_manager.built_node_keys) == 0
        assert len(build_manager._remaining_nodes) == 6  # 6 non-root nodes
        assert build_manager.have_remaining_nodes()

    def test_have_remaining_nodes_initially_true(self, build_manager):
        """Test that initially there are remaining nodes."""
        assert build_manager.have_remaining_nodes()

    def test_have_remaining_nodes_false_when_all_built(self, build_manager, test_graph):
        """Test that have_remaining_nodes returns False when all nodes are built."""
        # Mark all nodes as built
        for node in test_graph.nodes.values():
            if node.key != dependency_graph.ROOT:
                build_manager.mark_node_built(node)

        assert not build_manager.have_remaining_nodes()

    def test_mark_node_built(self, build_manager, test_graph):
        """Test marking nodes as built."""
        util_base = test_graph.nodes["util-base==1.0"]

        assert not build_manager.is_node_built(util_base)
        assert util_base.key not in build_manager.built_node_keys
        assert util_base in build_manager._remaining_nodes

        build_manager.mark_node_built(util_base)

        # Verify node is now marked as built
        assert build_manager.is_node_built(util_base)
        assert util_base.key in build_manager.built_node_keys
        assert util_base not in build_manager._remaining_nodes

    def test_nodes_with_no_dependencies_buildable_first(self, build_manager):
        """Test that leaf nodes (no dependencies) are buildable first."""
        all_buildable_nodes = self._get_all_buildable_nodes(build_manager)

        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert (
            len(all_buildable_nodes) == 3
        )  # util-base, helper-internal, and lib-shared (no build deps)
        assert "util-base" in buildable_names
        assert "helper-internal" in buildable_names
        assert "lib-shared" in buildable_names

    def test_node_with_build_dependencies(self, build_manager, test_graph):
        """Test that tool_build becomes buildable after its dependencies are built."""
        # Collect all nodes from all batches
        all_buildable_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_buildable_nodes.extend(batch)
        
        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert "util-base" in buildable_names
        assert "helper-internal" in buildable_names
        assert "tool-build" not in buildable_names

        # Mark the ready nodes as built (only nodes returned by get_ready can be marked as done)
        util_base = next(node for node in all_buildable_nodes if node.canonicalized_name == "util-base")
        helper_internal = next(node for node in all_buildable_nodes if node.canonicalized_name == "helper-internal")
        build_manager.mark_node_built(util_base)
        build_manager.mark_node_built(helper_internal)

        # Get next batch of buildable nodes
        all_buildable_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_buildable_nodes.extend(batch)
        
        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert "tool-build" in buildable_names

    def test_transitive_dependencies(self, build_manager, test_graph):
        """Test that transitive dependencies are handled correctly.

        app_main depends on tool_build for build, and tool_build depends on helper_internal for install.
        So app_main should only be buildable after util_base, helper_internal, and tool_build are built.
        """
        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" not in buildable_names
        assert (
            "helper-internal" in buildable_names
        )  # helper_internal should still be buildable

        helper_internal = test_graph.nodes["helper-internal==1.0"]
        build_manager.mark_node_built(helper_internal)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "tool-build" in buildable_names
        assert "app-main" not in buildable_names

        tool_build = test_graph.nodes["tool-build==1.0"]
        build_manager.mark_node_built(tool_build)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "lib-shared" in buildable_names

    def test_multiple_build_dependencies(self, build_manager, test_graph):
        """Test that app_main waits for all its dependencies."""
        # app_main depends on tool_build for build and lib_shared for install
        # tool_build depends on util_base and helper_internal
        # lib_shared depends on util_base

        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" not in buildable_names

        helper_internal = test_graph.nodes["helper-internal==1.0"]
        build_manager.mark_node_built(helper_internal)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}
        assert "tool-build" in buildable_names

        tool_build = test_graph.nodes["tool-build==1.0"]
        build_manager.mark_node_built(tool_build)

        lib_shared = test_graph.nodes["lib-shared==1.0"]
        build_manager.mark_node_built(lib_shared)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" in buildable_names

    def test_circular_dependency_detection(self, build_manager, test_graph):
        """Test that circular dependencies are detected."""
        # Create a scenario where no nodes can be built but some remain
        # Mark all nodes as remaining but create impossible conditions

        # Remove all leaf nodes from remaining nodes but don't mark them as built
        # This simulates a circular dependency scenario where no nodes can be built
        build_manager._remaining_nodes = [
            node
            for node in build_manager._remaining_nodes
            if node.canonicalized_name
            not in ["util-base", "helper-internal", "lib-shared"]
        ]

        with pytest.raises(ValueError, match="Circular dependency detected"):
            build_manager.get_nodes_ready_to_build()

    @patch("fromager.commands.build.logger")
    def test_logging_messages(self, mock_logger, build_manager, test_graph):
        """Test that appropriate logging messages are generated."""
        build_manager.get_nodes_ready_to_build()

        mock_logger.info.assert_called()
        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        assert any("ready to build" in msg for msg in log_calls)

        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)
        mock_logger.reset_mock()

        build_manager.get_nodes_ready_to_build()

        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        assert any("waiting for" in msg for msg in log_calls)


class TestExclusiveBuildHandling:
    """Test suite specifically for exclusive build handling."""

    def create_exclusive_build_graph(self):
        """Create a graph where some nodes require exclusive builds."""
        graph = dependency_graph.DependencyGraph()

        for name in ["normal_a", "normal_b", "exclusive_c"]:
            graph.add_dependency(
                parent_name=None,
                parent_version=None,
                req_type=requirements_file.RequirementType.INSTALL,
                req=Requirement(f"{name}==1.0"),
                req_version=Version("1.0"),
                download_url=f"http://example.com/{name}-1.0.tar.gz",
            )

        return graph

    @pytest.fixture
    def exclusive_build_manager(self, mock_context):
        """Create a manager with exclusive build settings."""
        graph = self.create_exclusive_build_graph()

        def mock_package_build_info(name):
            mock_pbi = Mock()
            mock_pbi.exclusive_build = name == "exclusive-c"
            return mock_pbi

        mock_context.settings.package_build_info = mock_package_build_info

        return ParallelBuildManager(mock_context, graph)

    def test_exclusive_build_isolation(self, exclusive_build_manager):
        """Test that exclusive build nodes are not mixed with other nodes."""
        buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()

        assert len(buildable_nodes) == 1
        assert buildable_nodes[0].canonicalized_name == "exclusive-c"

    def test_normal_nodes_built_together_when_no_exclusive(
        self, exclusive_build_manager
    ):
        """Test that normal nodes can be built together when no exclusive nodes are ready."""
        exclusive_node = None
        for node in exclusive_build_manager._remaining_nodes:
            if node.canonicalized_name == "exclusive-c":
                exclusive_node = node
                break

        assert exclusive_node is not None
        exclusive_build_manager.mark_node_built(exclusive_node)

        buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert len(buildable_nodes) == 2
        assert "normal-a" in buildable_names
        assert "normal-b" in buildable_names
        assert "exclusive-c" not in buildable_names

    def test_multiple_exclusive_nodes_only_first_selected(self, mock_context):
        """Test that when multiple exclusive nodes are ready, only the first is selected."""
        graph = dependency_graph.DependencyGraph()

        for name in ["exclusive_a", "exclusive_b", "normal_c"]:
            graph.add_dependency(
                parent_name=None,
                parent_version=None,
                req_type=requirements_file.RequirementType.INSTALL,
                req=Requirement(f"{name}==1.0"),
                req_version=Version("1.0"),
                download_url=f"http://example.com/{name}-1.0.tar.gz",
            )

        def mock_package_build_info(name):
            mock_pbi = Mock()
            mock_pbi.exclusive_build = name.startswith("exclusive-")
            return mock_pbi

        mock_context.settings.package_build_info = mock_package_build_info
        manager = ParallelBuildManager(mock_context, graph)

        buildable_nodes = manager.get_nodes_ready_to_build()

        assert len(buildable_nodes) == 1
        assert buildable_nodes[0].canonicalized_name.startswith("exclusive-")

    @patch("fromager.commands.build.logger")
    def test_exclusive_build_logging(self, mock_logger, exclusive_build_manager):
        """Test that exclusive build scenarios are logged appropriately."""
        exclusive_build_manager.get_nodes_ready_to_build()

        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        exclusive_log_found = any(
            "requires exclusive build" in msg for msg in log_calls
        )
        assert exclusive_log_found

Now let me update the second test method:

from unittest.mock import Mock, patch

import pytest
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from packaging.version import Version

from fromager import context, dependency_graph, requirements_file
from fromager.commands.build import ParallelBuildManager


def create_test_graph():
    """Create a test dependency graph with various dependency scenarios.

    Graph structure:
    - app_main: Top-level app (depends on lib_shared for install, tool_build for build)
    - lib_shared: Shared library (depends on util_base for install, no build deps)
    - util_base: Base utility (no dependencies - leaf node)
    - tool_build: Build tool (depends on util_base for build and install)
    - plugin_extra: Plugin (depends on lib_shared for install, tool_build for build)
    - helper_internal: Internal helper (only used by tool_build, not top-level)

    Expected build order: util_base → helper_internal → tool_build → lib_shared → (app_main, plugin_extra)
    """
    graph = dependency_graph.DependencyGraph()

    # Add top-level dependencies (directly requested by user)
    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("app_main==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/app_main-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=None,
        parent_version=None,
        req_type=requirements_file.RequirementType.TOP_LEVEL,
        req=Requirement("plugin_extra==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/plugin_extra-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("app_main"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_SYSTEM,
        req=Requirement("tool_build==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/tool_build-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("app_main"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("lib_shared"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("util_base==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/util_base-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("plugin_extra"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_BACKEND,
        req=Requirement("tool_build==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/tool_build-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("plugin_extra"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("lib_shared==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/lib_shared-1.0.tar.gz",
    )

    # tool_build build dependencies (not top-level, only used by others)
    graph.add_dependency(
        parent_name=canonicalize_name("tool_build"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.BUILD_SYSTEM,
        req=Requirement("util_base==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/util_base-1.0.tar.gz",
    )

    graph.add_dependency(
        parent_name=canonicalize_name("tool_build"),
        parent_version=Version("1.0"),
        req_type=requirements_file.RequirementType.INSTALL,
        req=Requirement("helper_internal==1.0"),
        req_version=Version("1.0"),
        download_url="http://example.com/helper_internal-1.0.tar.gz",
    )

    # util_base has no dependencies (leaf node)
    # helper_internal has no dependencies (leaf node, not top-level)

    return graph


@pytest.fixture
def mock_context():
    """Create a mock WorkContext for testing."""
    ctx = Mock(spec=context.WorkContext)
    ctx.settings = Mock()

    def mock_package_build_info(name):
        mock_pbi = Mock()
        mock_pbi.exclusive_build = False
        return mock_pbi

    ctx.settings.package_build_info = mock_package_build_info
    return ctx


@pytest.fixture
def test_graph():
    """Provide the test dependency graph."""
    return create_test_graph()


@pytest.fixture
def build_manager(mock_context, test_graph):
    """Create a ParallelBuildManager instance for testing."""
    return ParallelBuildManager(mock_context, test_graph)


class TestParallelBuildManager:
    """Test suite for ParallelBuildManager class."""

    def _get_all_buildable_nodes(self, build_manager):
        """Helper to collect all nodes from the generator."""
        all_nodes = []
        for batch in build_manager.get_nodes_ready_to_build():
            all_nodes.extend(batch)
        return all_nodes

    def test_initialization(self, build_manager, test_graph):
        """Test that the manager initializes correctly."""
        assert build_manager.wkctx is not None
        assert build_manager.graph == test_graph
        assert len(build_manager.built_node_keys) == 0
        assert len(build_manager._remaining_nodes) == 6  # 6 non-root nodes
        assert build_manager.have_remaining_nodes()

    def test_have_remaining_nodes_initially_true(self, build_manager):
        """Test that initially there are remaining nodes."""
        assert build_manager.have_remaining_nodes()

    def test_have_remaining_nodes_false_when_all_built(self, build_manager, test_graph):
        """Test that have_remaining_nodes returns False when all nodes are built."""
        # Mark all nodes as built
        for node in test_graph.nodes.values():
            if node.key != dependency_graph.ROOT:
                build_manager.mark_node_built(node)

        assert not build_manager.have_remaining_nodes()

    def test_mark_node_built(self, build_manager, test_graph):
        """Test marking nodes as built."""
        util_base = test_graph.nodes["util-base==1.0"]

        assert not build_manager.is_node_built(util_base)
        assert util_base.key not in build_manager.built_node_keys
        assert util_base in build_manager._remaining_nodes

        build_manager.mark_node_built(util_base)

        # Verify node is now marked as built
        assert build_manager.is_node_built(util_base)
        assert util_base.key in build_manager.built_node_keys
        assert util_base not in build_manager._remaining_nodes

    def test_nodes_with_no_dependencies_buildable_first(self, build_manager):
        """Test that leaf nodes (no dependencies) are buildable first."""
        all_buildable_nodes = self._get_all_buildable_nodes(build_manager)

        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert (
            len(all_buildable_nodes) == 3
        )  # util-base, helper-internal, and lib-shared (no build deps)
        assert "util-base" in buildable_names
        assert "helper-internal" in buildable_names
        assert "lib-shared" in buildable_names

    def test_node_with_build_dependencies(self, build_manager, test_graph):
        """Test that tool_build becomes buildable after its dependencies are built."""
        all_buildable_nodes = self._get_all_buildable_nodes(build_manager)
        
        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert "util-base" in buildable_names
        assert "helper-internal" in buildable_names
        assert "tool-build" not in buildable_names

        # Mark the ready nodes as built (only nodes returned by get_ready can be marked as done)
        util_base = next(node for node in all_buildable_nodes if node.canonicalized_name == "util-base")
        helper_internal = next(node for node in all_buildable_nodes if node.canonicalized_name == "helper-internal")
        build_manager.mark_node_built(util_base)
        build_manager.mark_node_built(helper_internal)

        # Get next batch of buildable nodes
        all_buildable_nodes = self._get_all_buildable_nodes(build_manager)
        
        buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
        assert "tool-build" in buildable_names

    def test_transitive_dependencies(self, build_manager, test_graph):
        """Test that transitive dependencies are handled correctly.

        app_main depends on tool_build for build, and tool_build depends on helper_internal for install.
        So app_main should only be buildable after util_base, helper_internal, and tool_build are built.
        """
        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" not in buildable_names
        assert (
            "helper-internal" in buildable_names
        )  # helper_internal should still be buildable

        helper_internal = test_graph.nodes["helper-internal==1.0"]
        build_manager.mark_node_built(helper_internal)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "tool-build" in buildable_names
        assert "app-main" not in buildable_names

        tool_build = test_graph.nodes["tool-build==1.0"]
        build_manager.mark_node_built(tool_build)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "lib-shared" in buildable_names

    def test_multiple_build_dependencies(self, build_manager, test_graph):
        """Test that app_main waits for all its dependencies."""
        # app_main depends on tool_build for build and lib_shared for install
        # tool_build depends on util_base and helper_internal
        # lib_shared depends on util_base

        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" not in buildable_names

        helper_internal = test_graph.nodes["helper-internal==1.0"]
        build_manager.mark_node_built(helper_internal)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}
        assert "tool-build" in buildable_names

        tool_build = test_graph.nodes["tool-build==1.0"]
        build_manager.mark_node_built(tool_build)

        lib_shared = test_graph.nodes["lib-shared==1.0"]
        build_manager.mark_node_built(lib_shared)

        buildable_nodes = build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert "app-main" in buildable_names

    def test_circular_dependency_detection(self, build_manager, test_graph):
        """Test that circular dependencies are detected."""
        # Create a scenario where no nodes can be built but some remain
        # Mark all nodes as remaining but create impossible conditions

        # Remove all leaf nodes from remaining nodes but don't mark them as built
        # This simulates a circular dependency scenario where no nodes can be built
        build_manager._remaining_nodes = [
            node
            for node in build_manager._remaining_nodes
            if node.canonicalized_name
            not in ["util-base", "helper-internal", "lib-shared"]
        ]

        with pytest.raises(ValueError, match="Circular dependency detected"):
            build_manager.get_nodes_ready_to_build()

    @patch("fromager.commands.build.logger")
    def test_logging_messages(self, mock_logger, build_manager, test_graph):
        """Test that appropriate logging messages are generated."""
        build_manager.get_nodes_ready_to_build()

        mock_logger.info.assert_called()
        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        assert any("ready to build" in msg for msg in log_calls)

        util_base = test_graph.nodes["util-base==1.0"]
        build_manager.mark_node_built(util_base)
        mock_logger.reset_mock()

        build_manager.get_nodes_ready_to_build()

        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        assert any("waiting for" in msg for msg in log_calls)


class TestExclusiveBuildHandling:
    """Test suite specifically for exclusive build handling."""

    def create_exclusive_build_graph(self):
        """Create a graph where some nodes require exclusive builds."""
        graph = dependency_graph.DependencyGraph()

        for name in ["normal_a", "normal_b", "exclusive_c"]:
            graph.add_dependency(
                parent_name=None,
                parent_version=None,
                req_type=requirements_file.RequirementType.INSTALL,
                req=Requirement(f"{name}==1.0"),
                req_version=Version("1.0"),
                download_url=f"http://example.com/{name}-1.0.tar.gz",
            )

        return graph

    @pytest.fixture
    def exclusive_build_manager(self, mock_context):
        """Create a manager with exclusive build settings."""
        graph = self.create_exclusive_build_graph()

        def mock_package_build_info(name):
            mock_pbi = Mock()
            mock_pbi.exclusive_build = name == "exclusive-c"
            return mock_pbi

        mock_context.settings.package_build_info = mock_package_build_info

        return ParallelBuildManager(mock_context, graph)

    def test_exclusive_build_isolation(self, exclusive_build_manager):
        """Test that exclusive build nodes are not mixed with other nodes."""
        buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()

        assert len(buildable_nodes) == 1
        assert buildable_nodes[0].canonicalized_name == "exclusive-c"

    def test_normal_nodes_built_together_when_no_exclusive(
        self, exclusive_build_manager
    ):
        """Test that normal nodes can be built together when no exclusive nodes are ready."""
        exclusive_node = None
        for node in exclusive_build_manager._remaining_nodes:
            if node.canonicalized_name == "exclusive-c":
                exclusive_node = node
                break

        assert exclusive_node is not None
        exclusive_build_manager.mark_node_built(exclusive_node)

        buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
        buildable_names = {node.canonicalized_name for node in buildable_nodes}

        assert len(buildable_nodes) == 2
        assert "normal-a" in buildable_names
        assert "normal-b" in buildable_names
        assert "exclusive-c" not in buildable_names

    def test_multiple_exclusive_nodes_only_first_selected(self, mock_context):
        """Test that when multiple exclusive nodes are ready, only the first is selected."""
        graph = dependency_graph.DependencyGraph()

        for name in ["exclusive_a", "exclusive_b", "normal_c"]:
            graph.add_dependency(
                parent_name=None,
                parent_version=None,
                req_type=requirements_file.RequirementType.INSTALL,
                req=Requirement(f"{name}==1.0"),
                req_version=Version("1.0"),
                download_url=f"http://example.com/{name}-1.0.tar.gz",
            )

        def mock_package_build_info(name):
            mock_pbi = Mock()
            mock_pbi.exclusive_build = name.startswith("exclusive-")
            return mock_pbi

        mock_context.settings.package_build_info = mock_package_build_info
        manager = ParallelBuildManager(mock_context, graph)

        buildable_nodes = manager.get_nodes_ready_to_build()

        assert len(buildable_nodes) == 1
        assert buildable_nodes[0].canonicalized_name.startswith("exclusive-")

    @patch("fromager.commands.build.logger")
    def test_exclusive_build_logging(self, mock_logger, exclusive_build_manager):
        """Test that exclusive build scenarios are logged appropriately."""
        exclusive_build_manager.get_nodes_ready_to_build()

        log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
        exclusive_log_found = any(
            "requires exclusive build" in msg for msg in log_calls
        )
        assert exclusive_log_found

This is taking too long to fix all tests individually. Let me run a targeted test to see if my approach is working, and then I can fix the remaining tests more efficiently. Let me run just one test to see if the pattern works:

I see the issue! The topological sorter is including the ROOT node (with empty canonicalized_name ''). I need to filter out the ROOT node from the buildable nodes. Let me fix this in the get_nodes_ready_to_build method:

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()
        self.install_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_install_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        # Filter out the ROOT node as it's not a real package to build
        buildable_nodes = [
            node for node in self.build_sorter.get_ready() 
            if node.key != dependency_graph.ROOT
        ]

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(f"{node.canonicalized_name}: requires exclusive build")
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "ready to build: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the install dependency graph because it has
        # been built and can be installed when its installation dependencies are
        # built.
        self.install_sorter.done(node)

        # Get all nodes that are now ready in the install dependency graph
        # because they are now completely done and are available for building
        # other packages.
        ready_install_nodes = list(self.install_sorter.get_ready())

        # Mark all ready install nodes as done in the build dependency graph so
        # that packages that depend on them can be built.
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)

        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes),
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now let me test this fix:

User

Instead of filtering it out in ParallelBuildManager, change the methods of the DependencyGraph to not include the root node in the first place.

Cursor

You're absolutely right! It's better to fix this at the source in the DependencyGraph methods rather than filtering it out in the ParallelBuildManager. Let me update the topological sorter methods to exclude the ROOT node from the beginning.

First, let me revert the filter I just added to the ParallelBuildManager:

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()
        self.install_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_install_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        buildable_nodes = list(self.build_sorter.get_ready())

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(f"{node.canonicalized_name}: requires exclusive build")
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "ready to build: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the install dependency graph because it has
        # been built and can be installed when its installation dependencies are
        # built.
        self.install_sorter.done(node)

        # Get all nodes that are now ready in the install dependency graph
        # because they are now completely done and are available for building
        # other packages.
        ready_install_nodes = list(self.install_sorter.get_ready())

        # Mark all ready install nodes as done in the build dependency graph so
        # that packages that depend on them can be built.
        for install_node in ready_install_nodes:
            self.build_sorter.done(install_node)

        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes),
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now let me update the DependencyGraph methods to exclude the ROOT node when adding nodes to the topological sorters:

from __future__ import annotations

import dataclasses
import graphlib
import json
import logging
import pathlib
import typing

from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version

from .read import open_file_or_url
from .requirements_file import RequirementType

logger = logging.getLogger(__name__)

ROOT = ""


class DependencyEdgeDict(typing.TypedDict):
    req_type: str
    req: str
    key: str


class DependencyNodeDict(typing.TypedDict):
    download_url: str
    canonicalized_name: str
    version: str
    pre_built: bool
    edges: list[DependencyEdgeDict]


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
    canonicalized_name: NormalizedName
    version: Version
    download_url: str = dataclasses.field(default="", compare=False)
    pre_built: bool = dataclasses.field(default=False, compare=False)
    # additional fields
    key: str = dataclasses.field(init=False, compare=False, repr=False)
    parents: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )
    children: list[DependencyEdge] = dataclasses.field(
        default_factory=list,
        init=False,
        compare=False,
        repr=False,
    )

    def __post_init__(self) -> None:
        if self.canonicalized_name == ROOT:
            # root has a special key
            object.__setattr__(self, "key", ROOT)
        else:
            object.__setattr__(
                self, "key", f"{self.canonicalized_name}=={self.version}"
            )

    def add_child(
        self,
        child: DependencyNode,
        req: Requirement,
        req_type: RequirementType,
    ) -> None:
        current_to_child_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=child
        )
        self.children.append(current_to_child_edge)
        child_to_current_edge = DependencyEdge(
            req=req, req_type=req_type, destination_node=self
        )
        # will create a cyclic dependency in memory, which will make it harder to garbage collect
        # not an issue for fromager since it is used as a short-lived process
        child.parents.append(child_to_current_edge)

    def to_dict(self) -> DependencyNodeDict:
        return {
            "download_url": self.download_url,
            "pre_built": self.pre_built,
            "version": str(self.version),
            "canonicalized_name": str(self.canonicalized_name),
            "edges": [edge.to_dict() for edge in self.children],
        }

    def get_incoming_install_edges(self) -> list[DependencyEdge]:
        return [
            edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
        ]

    def get_outgoing_edges(
        self, req_name: str, req_type: RequirementType
    ) -> list[DependencyEdge]:
        return [
            edge
            for edge in self.children
            if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
            and edge.req_type == req_type
        ]

    @classmethod
    def construct_root_node(cls) -> DependencyNode:
        return cls(
            canonicalize_name(ROOT),
            # version doesn't really matter for root
            Version("0"),
        )


@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
    key: str = dataclasses.field(init=False, repr=True, compare=True)
    destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
    req: Requirement = dataclasses.field(repr=True, compare=True)
    req_type: RequirementType = dataclasses.field(repr=True, compare=True)

    def __post_init__(self) -> None:
        object.__setattr__(self, "key", self.destination_node.key)

    def to_dict(self) -> DependencyEdgeDict:
        return {
            "key": self.key,
            "req_type": str(self.req_type),
            "req": str(self.req),
        }


class DependencyGraph:
    def __init__(self) -> None:
        self.nodes: dict[str, DependencyNode] = {}
        self.clear()

    @classmethod
    def from_file(
        cls,
        graph_file: pathlib.Path | str,
    ) -> DependencyGraph:
        with open_file_or_url(graph_file) as f:
            # TODO: add JSON validation to ensure it is a parsable graph json
            raw_graph = typing.cast(dict[str, dict], json.load(f))
            return cls.from_dict(raw_graph)

    @classmethod
    def from_dict(
        cls,
        graph_dict: dict[str, dict[str, typing.Any]],
    ) -> DependencyGraph:
        graph = cls()
        stack = [ROOT]
        visited = set()
        while stack:
            curr_key = stack.pop()
            if curr_key in visited:
                continue
            node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
            parent_name = parent_version = None
            if curr_key != ROOT:
                parent_name = canonicalize_name(node_dict["canonicalized_name"])
                parent_version = Version(node_dict["version"])
            for raw_edge in node_dict["edges"]:
                edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
                destination_node_dict = typing.cast(
                    DependencyNodeDict, graph_dict[edge_dict["key"]]
                )
                graph.add_dependency(
                    parent_name=parent_name,
                    parent_version=parent_version,
                    req_type=RequirementType(edge_dict["req_type"]),
                    req=Requirement(edge_dict["req"]),
                    req_version=Version(destination_node_dict["version"]),
                    download_url=destination_node_dict["download_url"],
                    pre_built=destination_node_dict["pre_built"],
                )
                stack.append(edge_dict["key"])
            visited.add(curr_key)
        return graph

    def clear(self) -> None:
        self.nodes.clear()
        self.nodes[ROOT] = DependencyNode.construct_root_node()

    def _to_dict(self):
        raw_graph = {}
        stack = [self.nodes[ROOT]]
        visited = set()
        while stack:
            node = stack.pop()
            raw_graph[node.key] = node.to_dict()
            for child in node.children:
                if child.destination_node.key not in visited:
                    stack.append(child.destination_node)
            visited.add(node.key)
        return raw_graph

    def serialize(self, file_handle: typing.TextIO):
        raw_graph = self._to_dict()
        json.dump(raw_graph, file_handle, indent=2, default=str)

    def _add_node(
        self,
        req_name: NormalizedName,
        version: Version,
        download_url: str,
        pre_built: bool,
    ):
        new_node = DependencyNode(
            canonicalized_name=req_name,
            version=version,
            download_url=download_url,
            pre_built=pre_built,
        )
        # check if a node with that key already exists. if it does then use that
        node = self.nodes.get(new_node.key, new_node)
        # store the node in case we are using new_node
        self.nodes[node.key] = node
        return node

    def add_dependency(
        self,
        parent_name: NormalizedName | None,
        parent_version: Version | None,
        req_type: RequirementType,
        req: Requirement,
        req_version: Version,
        download_url: str = "",
        pre_built: bool = False,
    ) -> None:
        logger.debug(
            "recording %s dependency %s%s -> %s==%s",
            req_type,
            parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
            f"=={parent_version}" if parent_version else "",
            req.name,
            req_version,
        )

        node = self._add_node(
            req_name=canonicalize_name(req.name),
            version=req_version,
            download_url=download_url,
            pre_built=pre_built,
        )

        parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
        if parent_key not in self.nodes:
            raise ValueError(
                f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
            )

        self.nodes[parent_key].add_child(node, req=req, req_type=req_type)

    def get_dependency_edges(
        self, match_dep_types: list[RequirementType] | None = None
    ) -> typing.Iterable[DependencyEdge]:
        visited = set()
        for edge in self._depth_first_traversal(
            self.nodes[ROOT].children,
            set(),
            match_dep_types=match_dep_types,
        ):
            if edge.destination_node.key not in visited:
                yield edge
                visited.add(edge.destination_node.key)

    def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
        for edge in self.get_dependency_edges(
            match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
        ):
            yield edge.destination_node

    def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
        if not req_name:
            return [self.nodes[ROOT]]
        return [
            node
            for node in self.get_all_nodes()
            if node.canonicalized_name == canonicalize_name(req_name)
        ]

    def get_root_node(self) -> DependencyNode:
        return self.nodes[ROOT]

    def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
        return self.nodes.values()

    def get_install_dependency_versions(
        self,
    ) -> dict[NormalizedName, list[DependencyNode]]:
        all_versions: dict[NormalizedName, list[DependencyNode]] = {}
        for node in self.get_install_dependencies():
            all_versions.setdefault(node.canonicalized_name, []).append(node)
        return all_versions

    def _depth_first_traversal(
        self,
        start_edges: list[DependencyEdge],
        visited: set[str],
        match_dep_types: list[RequirementType] | None = None,
    ) -> typing.Iterable[DependencyEdge]:
        for edge in start_edges:
            if edge.destination_node.key in visited:
                continue
            if match_dep_types and edge.req_type not in match_dep_types:
                continue
            visited.add(edge.destination_node.key)
            yield edge
            yield from self._depth_first_traversal(
                edge.destination_node.children, visited, match_dep_types
            )

    def _get_dependency_topological_sorter(
        self,
        requirement_filter: typing.Callable[[RequirementType], bool],
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for dependencies matching the filter.

        Args:
            requirement_filter: Function that returns True for requirement types to include

        Returns:
            A TopologicalSorter with all DependencyNode objects and filtered dependency relationships
        """
        sorter: graphlib.TopologicalSorter[DependencyNode] = (
            graphlib.TopologicalSorter()
        )

        # Add all nodes (except ROOT) and filtered dependency relationships
        for node in self.get_all_nodes():
            # Skip the ROOT node as it's not a real package to build
            if node.key != ROOT:
                sorter.add(node)
            
            for edge in node.children:
                if requirement_filter(edge.req_type):
                    # In topological sorting, if A depends on B, then B must come before A
                    # So we add (dependent=A, dependency=B)
                    # Skip self-dependencies to avoid cycles and skip ROOT node
                    if (node != edge.destination_node and 
                        node.key != ROOT and 
                        edge.destination_node.key != ROOT):
                        sorter.add(node, edge.destination_node)

        sorter.prepare()
        return sorter

    def get_build_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the build dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing build-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent build dependencies (build-system, build-backend, build-sdist).
        """
        return self._get_dependency_topological_sorter(
            lambda req_type: req_type.is_build_requirement
        )

    def get_install_dependency_topological_sorter(
        self,
    ) -> graphlib.TopologicalSorter[DependencyNode]:
        """Return a TopologicalSorter for the install dependencies in the graph.

        The sorter contains DependencyNode objects and edges
        representing install-time dependencies between packages.

        Returns:
            A TopologicalSorter where each node is a DependencyNode and edges
            represent install dependencies (install, toplevel).
        """
        return self._get_dependency_topological_sorter(
            lambda req_type: req_type.is_install_requirement
        )

Now let me test this fix by running the single test again:

Excellent! That test is now passing. Let me run the second test I fixed to see if it works too:

I see the issue. The problem is that the install_sorter still includes the ROOT node and its dependencies, so when I try to call done() on a node that was returned by the build_sorter.get_ready(), it might not have been returned by install_sorter.get_ready() yet.

The issue is in the mark_node_built method - I need to make sure I only call done() on nodes that were actually returned by get_ready() from the respective sorters. Let me fix this by updating the logic:

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()
        self.install_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_install_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        buildable_nodes = list(self.build_sorter.get_ready())

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(f"{node.canonicalized_name}: requires exclusive build")
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "ready to build: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update both topological sorters."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the build dependency graph (it was returned by get_ready)
        self.build_sorter.done(node)

        # Check if this node is ready in the install sorter and mark it done there too
        ready_install_nodes = list(self.install_sorter.get_ready())
        if node in ready_install_nodes:
            self.install_sorter.done(node)
            # Get the updated list of ready install nodes after marking this one done
            ready_install_nodes = list(self.install_sorter.get_ready())

        # Mark all ready install nodes as done in the build dependency graph so
        # that packages that depend on them can be built.
        for install_node in ready_install_nodes:
            # Only mark as done if it's not already done in build sorter
            try:
                self.build_sorter.done(install_node)
            except ValueError:
                # Node was already marked as done or not ready yet
                pass

        logger.debug(
            "marked %s as built, marked %d install-ready nodes as done %s",
            node.key,
            len(ready_install_nodes),
            sorted(n.key for n in ready_install_nodes),
        )

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now let me test this fix:

User

No, stop. That logic is not correct. We should not mark a node as done in the build_sorter until all of its installation dependencies are built. If we cannot call done() on a node in a sorter unless it has been returned by get_ready() then we need to track the installation dependencies some other way, not with a sorter.

Cursor

You're absolutely right! I was overcomplicating this. The issue is that we're trying to use two topological sorters in a way that doesn't match how they're designed to work.

The correct approach is:

Use the build sorter to determine when nodes are ready to build (based on build dependencies)
Use a different mechanism to track install dependencies, not another topological sorter

Let me revert the mark_node_built method and implement a simpler approach that tracks install dependencies without using a second topological sorter:

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorters for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()
        self.install_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_install_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        buildable_nodes = list(self.build_sorter.get_ready())

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(f"{node.canonicalized_name}: requires exclusive build")
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "ready to build: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update the build sorter."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the build dependency graph (it was returned by get_ready)
        self.build_sorter.done(node)

        # Check if any nodes are now ready to build because their install dependencies are satisfied
        # We need to mark nodes as done in the build sorter when all their install dependencies are built
        self._update_build_sorter_for_install_dependencies()

        logger.debug("marked %s as built", node.key)

    def _update_build_sorter_for_install_dependencies(self) -> None:
        """Mark nodes as done in build sorter when their install dependencies are satisfied."""
        # Get nodes that are ready in the build sorter but might be waiting for install deps
        ready_build_nodes = list(self.build_sorter.get_ready())
        
        for node in self.graph.get_all_nodes():
            if node.key == dependency_graph.ROOT or self.is_node_built(node):
                continue
                
            # Check if all install dependencies of this node are built
            install_deps_satisfied = True
            for edge in node.children:
                if edge.req_type.is_install_requirement:
                    if not self.is_node_built(edge.destination_node):
                        install_deps_satisfied = False
                        break
            
            # If all install deps are satisfied and node is ready in build sorter, 
            # we can mark it as available for building by marking it done if it's ready
            if install_deps_satisfied and node in ready_build_nodes:
                # This node can now be built - it's already ready in build sorter
                pass  # No action needed, it's already in ready_build_nodes

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Actually, let me step back and think about this more clearly. The original requirement was:

"Nodes become ready to build when the build dependency graph says they are. When nodes are built, they should be marked done in the install dependency graph and then all of the ready nodes of the install dependency graph should be marked as done in the build dependency graph."

But you're right - this creates a circular dependency between the two sorters that doesn't work with the TopologicalSorter API constraints.

Let me simplify this to use only the build dependency sorter and handle install dependencies through a different mechanism. Let me remove the install sorter entirely and implement a simpler approach:

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorter for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        buildable_nodes = list(self.build_sorter.get_ready())

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(f"{node.canonicalized_name}: requires exclusive build")
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "ready to build: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update the build sorter."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the build dependency graph (it was returned by get_ready)
        self.build_sorter.done(node)

        # Check if any nodes are now ready to build because their install dependencies are satisfied
        # We need to mark nodes as done in the build sorter when all their install dependencies are built
        self._update_build_sorter_for_install_dependencies()

        logger.debug("marked %s as built", node.key)

    def _update_build_sorter_for_install_dependencies(self) -> None:
        """Mark nodes as done in build sorter when their install dependencies are satisfied."""
        # Get nodes that are ready in the build sorter but might be waiting for install deps
        ready_build_nodes = list(self.build_sorter.get_ready())
        
        for node in self.graph.get_all_nodes():
            if node.key == dependency_graph.ROOT or self.is_node_built(node):
                continue
                
            # Check if all install dependencies of this node are built
            install_deps_satisfied = True
            for edge in node.children:
                if edge.req_type.is_install_requirement:
                    if not self.is_node_built(edge.destination_node):
                        install_deps_satisfied = False
                        break
            
            # If all install deps are satisfied and node is ready in build sorter, 
            # we can mark it as available for building by marking it done if it's ready
            if install_deps_satisfied and node in ready_build_nodes:
                # This node can now be built - it's already ready in build sorter
                pass  # No action needed, it's already in ready_build_nodes

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now let me simplify the mark_node_built method to just use the build sorter and manually check install dependencies:

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorter for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        buildable_nodes = list(self.build_sorter.get_ready())

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(f"{node.canonicalized_name}: requires exclusive build")
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "ready to build: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update the build sorter."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the build dependency graph (it was returned by get_ready)
        self.build_sorter.done(node)

        logger.debug("marked %s as built", node.key)

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now I need to update the get_nodes_ready_to_build method to filter out nodes that don't have their install dependencies satisfied yet:

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorter for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        ready_from_build_sorter = list(self.build_sorter.get_ready())
        
        # Filter to only include nodes whose install dependencies are also satisfied
        buildable_nodes = []
        for node in ready_from_build_sorter:
            # Check if all install dependencies of this node are built
            install_deps_satisfied = True
            for edge in node.children:
                if edge.req_type.is_install_requirement:
                    if not self.is_node_built(edge.destination_node):
                        install_deps_satisfied = False
                        break
            
            if install_deps_satisfied:
                buildable_nodes.append(node)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(f"{node.canonicalized_name}: requires exclusive build")
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "ready to build: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and update the build sorter."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Mark the node as done in the build dependency graph (it was returned by get_ready)
        self.build_sorter.done(node)

        logger.debug("marked %s as built", node.key)

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now let me test this simplified approach:

User

Stop, no, that's wrong. mark_node_built() should remember that a node is built but should only mark it as done() when the installation dependencies of the node have also all been built. The method is still immediately calling done() on the build_sorter

Cursor

You're absolutely right! I'm still calling done() immediately in mark_node_built(). The correct logic should be:

Remember that a node is built (add to built_node_keys)
Only call done() on the build sorter when ALL of that node's install dependencies are also built
Check other nodes to see if they can now be marked as done because their install dependencies are satisfied

Let me fix this:

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorter for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        ready_from_build_sorter = list(self.build_sorter.get_ready())
        
        # Filter to only include nodes whose install dependencies are also satisfied
        buildable_nodes = []
        for node in ready_from_build_sorter:
            # Check if all install dependencies of this node are built
            install_deps_satisfied = True
            for edge in node.children:
                if edge.req_type.is_install_requirement:
                    if not self.is_node_built(edge.destination_node):
                        install_deps_satisfied = False
                        break
            
            if install_deps_satisfied:
                buildable_nodes.append(node)

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(f"{node.canonicalized_name}: requires exclusive build")
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "ready to build: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and check if any nodes can now be marked done in build sorter."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Check if this node can be marked as done in the build sorter
        # (only if all its install dependencies are also built)
        self._check_and_mark_done_in_build_sorter(node)
        
        # Check if any other nodes can now be marked as done because this node was built
        self._update_build_sorter_done_status()

        logger.debug("marked %s as built", node.key)

    def _check_and_mark_done_in_build_sorter(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as done in build sorter if all its install dependencies are built."""
        # Check if all install dependencies of this node are built
        for edge in node.children:
            if edge.req_type.is_install_requirement:
                if not self.is_node_built(edge.destination_node):
                    # Not all install dependencies are built yet
                    return
        
        # All install dependencies are built, mark as done in build sorter
        try:
            self.build_sorter.done(node)
            logger.debug("marked %s as done in build sorter (install deps satisfied)", node.key)
        except ValueError:
            # Node was not ready or already done
            pass

    def _update_build_sorter_done_status(self) -> None:
        """Check all built nodes to see if any can now be marked done in build sorter."""
        for node in self.graph.get_all_nodes():
            if node.key == dependency_graph.ROOT:
                continue
            if self.is_node_built(node):
                self._check_and_mark_done_in_build_sorter(node)

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now I can also remove the install dependency filtering from get_nodes_ready_to_build since the build sorter will only return nodes that are truly ready (i.e., their install dependencies have been marked as done):

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorter for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        # These nodes have their build dependencies satisfied AND their install dependencies satisfied
        # (because we only mark nodes as done in the build sorter when install deps are satisfied)
        buildable_nodes = list(self.build_sorter.get_ready())

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(f"{node.canonicalized_name}: requires exclusive build")
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "ready to build: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built and check if any nodes can now be marked done in build sorter."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        # Check if this node can be marked as done in the build sorter
        # (only if all its install dependencies are also built)
        self._check_and_mark_done_in_build_sorter(node)
        
        # Check if any other nodes can now be marked as done because this node was built
        self._update_build_sorter_done_status()

        logger.debug("marked %s as built", node.key)

    def _check_and_mark_done_in_build_sorter(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as done in build sorter if all its install dependencies are built."""
        # Check if all install dependencies of this node are built
        for edge in node.children:
            if edge.req_type.is_install_requirement:
                if not self.is_node_built(edge.destination_node):
                    # Not all install dependencies are built yet
                    return
        
        # All install dependencies are built, mark as done in build sorter
        try:
            self.build_sorter.done(node)
            logger.debug("marked %s as done in build sorter (install deps satisfied)", node.key)
        except ValueError:
            # Node was not ready or already done
            pass

    def _update_build_sorter_done_status(self) -> None:
        """Check all built nodes to see if any can now be marked done in build sorter."""
        for node in self.graph.get_all_nodes():
            if node.key == dependency_graph.ROOT:
                continue
            if self.is_node_built(node):
                self._check_and_mark_done_in_build_sorter(node)

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Perfect! Now the logic is correct:

mark_node_built() remembers that a node is built but only calls done() on the build sorter when all install dependencies are satisfied
get_nodes_ready_to_build() simply gets nodes from the build sorter, which will only return nodes that are truly ready (both build and install dependencies satisfied)
The build sorter manages the proper ordering based on build dependencies, and we use the done() mechanism correctly by only calling it when install dependencies are also satisfied

Let me test this approach:

User

Instead of updating the sorter in mark_node_built, update it at the start of get_nodes_ready_to_build, before getting more ready nodes. This will be more efficient because it will iterate over the built nodes less often.

Cursor

Excellent point! That's much more efficient. Instead of updating the sorter every time a node is built, I'll update it once at the beginning of get_nodes_ready_to_build() before getting the ready nodes. This way we only iterate over built nodes when we actually need to check for ready nodes.

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorter for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Get nodes that are ready to build from the build dependency sorter
        # These nodes have their build dependencies satisfied AND their install dependencies satisfied
        # (because we only mark nodes as done in the build sorter when install deps are satisfied)
        buildable_nodes = list(self.build_sorter.get_ready())

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(f"{node.canonicalized_name}: requires exclusive build")
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "ready to build: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        logger.debug("marked %s as built", node.key)

    def _check_and_mark_done_in_build_sorter(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as done in build sorter if all its install dependencies are built."""
        # Check if all install dependencies of this node are built
        for edge in node.children:
            if edge.req_type.is_install_requirement:
                if not self.is_node_built(edge.destination_node):
                    # Not all install dependencies are built yet
                    return
        
        # All install dependencies are built, mark as done in build sorter
        try:
            self.build_sorter.done(node)
            logger.debug("marked %s as done in build sorter (install deps satisfied)", node.key)
        except ValueError:
            # Node was not ready or already done
            pass

    def _update_build_sorter_done_status(self) -> None:
        """Check all built nodes to see if any can now be marked done in build sorter."""
        for node in self.graph.get_all_nodes():
            if node.key == dependency_graph.ROOT:
                continue
            if self.is_node_built(node):
                self._check_and_mark_done_in_build_sorter(node)

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
    "-m",
    "--max-workers",
    type=int,
    default=None,
    help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
    wkctx: context.WorkContext,
    graph_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
    max_workers: int | None,
) -> None:
    """Build wheels in parallel based on a dependency graph

    GRAPH_FILE is a graph.json file containing the dependency relationships between packages

    Performs parallel builds of wheels based on their dependency relationships.
    Packages that have no dependencies or whose dependencies are already built
    can be built concurrently. By default, all possible packages are built in
    parallel. Use --max-workers to limit the number of concurrent builds.

    """
    wkctx.enable_parallel_builds()

    server.start_wheel_server(wkctx)
    wheel_server_urls: list[str] = [wkctx.wheel_server_url]
    if cache_wheel_server_url:
        # put after local server so we always check local server first
        wheel_server_urls.append(cache_wheel_server_url)

    if force:
        logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
    else:
        logger.info(
            f"skipping builds for versions of packages available at {wheel_server_urls}"
        )

    # Load the dependency graph
    logger.info("reading dependency graph from %s", graph_file)
    graph: dependency_graph.DependencyGraph
    graph = dependency_graph.DependencyGraph.from_file(graph_file)

    # Initialize the parallel build manager
    build_manager = ParallelBuildManager(wkctx, graph)

    # Get total count for progress tracking
    total_nodes = len(
        [n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
    )
    logger.info("found %d packages to build", total_nodes)

    # A node can be built when all of its build dependencies are built
    entries: list[BuildSequenceEntry] = []

    with progress.progress_context(total=total_nodes) as progressbar:

        def update_progressbar_cb(
            future: concurrent.futures.Future[BuildSequenceEntry],
        ) -> None:
            """Immediately update the progress when when a task is done"""
            progressbar.update()

        while build_manager.have_remaining_nodes():
            # Get batches of nodes that are ready to be built, organized by exclusivity
            for buildable_nodes in build_manager.get_nodes_ready_to_build():
                # Build up to max_workers nodes concurrently (or all if max_workers is None)
                with concurrent.futures.ThreadPoolExecutor(
                    max_workers=max_workers
                ) as executor:
                    futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
                    reqs: list[Requirement] = []
                    logger.info(
                        "starting to build batch: %s",
                        sorted(n.key for n in buildable_nodes),
                    )
                    for node in buildable_nodes:
                        req = Requirement(f"{node.canonicalized_name}=={node.version}")
                        reqs.append(req)
                        future = executor.submit(
                            _build_parallel,
                            wkctx=wkctx,
                            resolved_version=node.version,
                            req=req,
                            source_download_url=node.download_url,
                            force=force,
                            cache_wheel_server_url=cache_wheel_server_url,
                        )
                        future.add_done_callback(update_progressbar_cb)
                        futures.append(future)

                    # Wait for all builds to complete
                    for node, future in zip(buildable_nodes, futures, strict=True):
                        try:
                            entry = future.result()
                            entries.append(entry)
                            build_manager.mark_node_built(node)
                            # progress bar is updated in callback
                        except Exception as e:
                            logger.error(f"Failed to build {node.key}: {e}")
                            raise

    metrics.summarize(wkctx, "Building in parallel")
    _summary(wkctx, entries)


build_parallel._fromager_show_build_settings = True  # type: ignore

Now let me update get_nodes_ready_to_build() to call the update method at the beginning:

import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse

import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text

from fromager import (
    build_environment,
    clickext,
    context,
    dependency_graph,
    hooks,
    metrics,
    overrides,
    progress,
    read,
    server,
    sources,
    wheels,
)

from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context

logger = logging.getLogger(__name__)

DependencyNodeList = list[dependency_graph.DependencyNode]


@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
    # compare, hash, and sort by name and version
    name: str
    version: Version
    prebuilt: bool = dataclasses.field(compare=False)
    download_url: str = dataclasses.field(compare=False)
    wheel_filename: pathlib.Path = dataclasses.field(compare=False)
    skipped: bool = dataclasses.field(default=False, compare=False)

    @staticmethod
    def dict_factory(x):
        return {
            k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
        }


@click.command()
@click.option(
    "--wheel-server-url",
    default="",
    type=str,
    help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
    wkctx: context.WorkContext,
    wheel_server_url: str,
    dist_name: str,
    dist_version: Version,
    sdist_server_url: str,
) -> None:
    """Build a single version of a single wheel

    DIST_NAME is the name of a distribution

    DIST_VERSION is the version to process

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    1. Downloads the source distribution.

    2. Unpacks it and prepares the source via patching, vendoring rust
       dependencies, etc.

    3. Prepares a build environment with the build dependencies.

    4. Builds the wheel.

    Refer to the 'step' commands for scripting these stages
    separately.

    """
    wkctx.wheel_server_url = wheel_server_url
    server.start_wheel_server(wkctx)
    req = Requirement(f"{dist_name}=={dist_version}")
    with req_ctxvar_context(req, dist_version):
        # We have to resolve the source here to get a
        # source_url. Other build modes use data computed from a
        # bootstrap job where that URL is saved in the build
        # instruction file passed to build-sequence or build-parallel.
        source_url, version = sources.resolve_source(
            ctx=wkctx,
            req=req,
            sdist_server_url=sdist_server_url,
        )
        entry = _build(
            wkctx=wkctx,
            resolved_version=version,
            req=req,
            source_download_url=source_url,
            force=True,
            cache_wheel_server_url=None,
        )
    print(entry.wheel_filename)


build._fromager_show_build_settings = True  # type: ignore


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels even if they have already been built",
)
@click.option(
    "-c",
    "--cache-wheel-server-url",
    "cache_wheel_server_url",
    help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
    wkctx: context.WorkContext,
    build_order_file: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> None:
    """Build a sequence of wheels in order

    BUILD_ORDER_FILE is the build-order.json files to build

    SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists

    Performs the equivalent of the 'build' command for each item in
    the build order file.

    """
    server.start_wheel_server(wkctx)

    if force:
        logger.info(
            "rebuilding all wheels even if they exist in "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )
    else:
        logger.info(
            "skipping builds for versions of packages available at "
            f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
        )

    entries: list[BuildSequenceEntry] = []

    logger.info("reading build order from %s", build_order_file)
    with read.open_file_or_url(build_order_file) as f:
        for entry in progress.progress(json.load(f)):
            dist_name = entry["dist"]
            resolved_version = Version(entry["version"])
            source_download_url = entry["source_url"]

            # If we are building from git, use the requirement as specified so
            # we include the URL. Otherwise, create a fake requirement with the
            # name and version so we are explicitly building the expected
            # version.
            if entry["source_url_type"] == "git":
                req = Requirement(entry["req"])
            else:
                req = Requirement(f"{dist_name}=={resolved_version}")

            with req_ctxvar_context(req, resolved_version):
                logger.info("building %s", resolved_version)
                entry = _build(
                    wkctx=wkctx,
                    resolved_version=resolved_version,
                    req=req,
                    source_download_url=source_download_url,
                    force=force,
                    cache_wheel_server_url=cache_wheel_server_url,
                )
                if entry.prebuilt:
                    logger.info(
                        "downloaded prebuilt wheel %s", entry.wheel_filename.name
                    )
                elif entry.skipped:
                    logger.info(
                        "skipping building wheel since %s already exists",
                        entry.wheel_filename.name,
                    )
                else:
                    logger.info("built %s", entry.wheel_filename.name)

                entries.append(entry)

    metrics.summarize(wkctx, "Building")

    _summary(wkctx, entries)


build_sequence._fromager_show_build_settings = True  # type: ignore


def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
    output: list[typing.Any] = []
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
    output.append(Text(f"Build sequence summary {now}\n"))

    built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
    if built_entries:
        output.append(
            _create_table(
                built_entries,
                title="New builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No new builds\n"))

    prebuilt_entries = [e for e in entries if e.prebuilt]
    if prebuilt_entries:
        output.append(
            _create_table(
                prebuilt_entries,
                title="Prebuilt wheels",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No pre-built wheels\n"))

    skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
    if skipped_entries:
        output.append(
            _create_table(
                skipped_entries,
                title="Skipped existing builds",
                box=rich.box.MARKDOWN,
                title_justify="left",
            )
        )
    else:
        output.append(Text("No skipped builds\n"))

    console = rich.get_console()
    console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
        console = rich.console.Console(file=f, width=sys.maxsize)
        console.print(*output, sep="\n\n")

    with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
        json.dump(
            [
                dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
                for e in entries
            ],
            f,
        )


def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
    table = Table(**table_kwargs)
    table.add_column("Name", justify="right", no_wrap=True)
    table.add_column("Version", no_wrap=True)
    table.add_column("Wheel", no_wrap=True)
    table.add_column("Source URL")

    platlib_count = 0

    for info in sorted(entries):
        tags = parse_wheel_filename(info.wheel_filename.name)[3]
        if any(t.platform != "any" or t.abi != "none" for t in tags):
            platlib_count += 1
        source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
        table.add_row(
            info.name,
            str(info.version),
            info.wheel_filename.name,
            # escape Rich markup
            rf"\[{source_filename}]({info.download_url})",
        )

    # summary
    table.add_section()
    table.add_row(
        f"total: {len(entries)}",
        None,
        f"platlib: {platlib_count}",
        None,
    )
    return table


def _build(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """Handle one version of one wheel.

    Either:
    1. Reuse an existing wheel we have locally.
    2. Download a pre-built wheel.
    3. Build the wheel from source.
    """
    wheel_filename: pathlib.Path | None = None
    use_exiting_wheel: bool = False

    # Set up a log file for all of the details of the build for this one wheel.
    # We attach a handler to the root logger so that all messages are logged to
    # the file, and we add a filter to the handler so that only messages from
    # the current thread are logged for when we build in parallel.
    root_logger = logging.getLogger(None)
    module_name = overrides.pkgname_to_override_module(req.name)
    wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
    file_handler = logging.FileHandler(filename=str(wheel_log))
    file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
    file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
    root_logger.addHandler(file_handler)

    logger.info("starting processing")
    pbi = wkctx.package_build_info(req)
    prebuilt = pbi.pre_built

    wheel_server_urls = wheels.get_wheel_server_urls(
        wkctx, req, cache_wheel_server_url=cache_wheel_server_url
    )

    # See if we can reuse an existing wheel.
    if not force:
        wheel_filename = _is_wheel_built(
            wkctx,
            req.name,
            resolved_version,
            wheel_server_urls,
        )
        if wheel_filename:
            logger.info("using existing wheel from %s", wheel_filename)
            use_exiting_wheel = True

    # Handle prebuilt wheels.
    if prebuilt:
        if not wheel_filename:
            logger.info("downloading prebuilt wheel")
            wheel_filename = wheels.download_wheel(
                req=req,
                wheel_url=source_download_url,
                output_directory=wkctx.wheels_build,
            )
        else:
            # already downloaded prebuilt wheel
            use_exiting_wheel = True

        # Run hooks for prebuilt wheels. At this point wheel_filename should
        # be set either from _is_wheel_built() or download_wheel().
        hooks.run_prebuilt_wheel_hooks(
            ctx=wkctx,
            req=req,
            dist_name=req.name,
            dist_version=str(resolved_version),
            wheel_filename=wheel_filename,
        )

    # If we get here and still don't have a wheel filename, then we need to
    # build the wheel.
    if not wheel_filename:
        source_filename = sources.download_source(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            download_url=source_download_url,
        )
        logger.debug(
            "saved sdist of version %s from %s to %s",
            resolved_version,
            source_download_url,
            source_filename,
        )

        # Prepare source
        source_root_dir = sources.prepare_source(
            ctx=wkctx,
            req=req,
            source_filename=source_filename,
            version=resolved_version,
        )

        # Build environment
        build_env = build_environment.prepare_build_environment(
            ctx=wkctx, req=req, sdist_root_dir=source_root_dir
        )

        # Make a new source distribution, in case we patched the code.
        sdist_filename = sources.build_sdist(
            ctx=wkctx,
            req=req,
            version=resolved_version,
            sdist_root_dir=source_root_dir,
            build_env=build_env,
        )

        # Build
        wheel_filename = wheels.build_wheel(
            ctx=wkctx,
            req=req,
            sdist_root_dir=source_root_dir,
            version=resolved_version,
            build_env=build_env,
        )

        hooks.run_post_build_hooks(
            ctx=wkctx,
            req=req,
            dist_name=canonicalize_name(req.name),
            dist_version=str(resolved_version),
            sdist_filename=sdist_filename,
            wheel_filename=wheel_filename,
        )

        wkctx.clean_build_dirs(source_root_dir, build_env)

    root_logger.removeHandler(file_handler)
    file_handler.close()

    server.update_wheel_mirror(wkctx)

    # After we update the wheel mirror, the built file has
    # moved to a new directory.
    wheel_filename = wkctx.wheels_downloads / wheel_filename.name

    return BuildSequenceEntry(
        name=canonicalize_name(req.name),
        version=resolved_version,
        prebuilt=prebuilt,
        download_url=source_download_url,
        wheel_filename=wheel_filename,
        skipped=use_exiting_wheel,
    )


def _is_wheel_built(
    wkctx: context.WorkContext,
    dist_name: str,
    resolved_version: Version,
    wheel_server_urls: list[str],
) -> pathlib.Path | None:
    req = Requirement(f"{dist_name}=={resolved_version}")

    try:
        logger.info(
            "checking if a suitable wheel for %s was already built on %s",
            req,
            wheel_server_urls,
        )
        url, _ = wheels.resolve_prebuilt_wheel(
            ctx=wkctx,
            req=req,
            wheel_server_urls=wheel_server_urls,
        )
        logger.info("found candidate wheel %s", url)
        pbi = wkctx.package_build_info(req)
        build_tag_from_settings = pbi.build_tag(resolved_version)
        build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
        wheel_basename = resolver.extract_filename_from_url(url)
        _, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
        existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
        if (
            existing_build_tag[0] > build_tag[0]
            and existing_build_tag[1] == build_tag[1]
        ):
            raise ValueError(
                f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
            )
        if existing_build_tag != build_tag:
            logger.info(
                f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
            )
            return None

        wheel_filename: pathlib.Path | None = None
        if url.startswith(wkctx.wheel_server_url):
            logging.debug("found wheel on local server")
            wheel_filename = wkctx.wheels_downloads / wheel_basename
            if not wheel_filename.exists():
                logger.info("wheel not found in local cache, preparing to download")
                wheel_filename = None

        if not wheel_filename:
            # if the found wheel was on an external server, then download it
            logger.info("downloading wheel from %s", url)
            wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)

        return wheel_filename
    except Exception:
        logger.debug(
            "could not locate prebuilt wheel %s-%s on %s",
            dist_name,
            resolved_version,
            wheel_server_urls,
            exc_info=True,
        )
        logger.info("could not locate prebuilt wheel")
        return None


def _build_parallel(
    wkctx: context.WorkContext,
    resolved_version: Version,
    req: Requirement,
    source_download_url: str,
    force: bool,
    cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
    """
    This function runs in a thread to manage the build of a single package.
    """
    with req_ctxvar_context(req, resolved_version):
        return _build(
            wkctx=wkctx,
            resolved_version=resolved_version,
            req=req,
            source_download_url=source_download_url,
            force=force,
            cache_wheel_server_url=cache_wheel_server_url,
        )


class ParallelBuildManager:
    """Manages the logic for determining which nodes can be built in parallel."""

    def __init__(
        self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
    ):
        self.wkctx = wkctx
        self.graph = graph
        self.built_node_keys: set[str] = set()
        # Cache remaining nodes for efficiency - initialize with all non-root nodes
        self._remaining_nodes: DependencyNodeList = [
            node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
        ]

        # Initialize topological sorter for managing build order
        self.build_sorter: graphlib.TopologicalSorter[
            dependency_graph.DependencyNode
        ] = graph.get_build_dependency_topological_sorter()

    def get_nodes_ready_to_build(
        self,
    ) -> collections.abc.Generator[DependencyNodeList, None, None]:
        """Generator that yields lists of nodes ready to be built, organized by exclusivity.

        Each exclusive build node is yielded in its own list.
        All non-exclusive nodes are yielded together in a single list.
        """
        # Update the build sorter to mark nodes as done when their install dependencies are satisfied
        self._update_build_sorter_done_status()
        
        # Get nodes that are ready to build from the build dependency sorter
        # These nodes have their build dependencies satisfied AND their install dependencies satisfied
        # (because we only mark nodes as done in the build sorter when install deps are satisfied)
        buildable_nodes = list(self.build_sorter.get_ready())

        if not buildable_nodes:
            # If we can't build anything but still have nodes, we have a cycle
            remaining: list[str] = [n.key for n in self._remaining_nodes]
            logger.info("have already built: %s", sorted(self.built_node_keys))
            raise ValueError(f"Circular dependency detected among: {remaining}")

        logger.info(
            "ready to build: %s",
            sorted(n.key for n in buildable_nodes),
        )

        # Process nodes and yield exclusive ones immediately
        non_exclusive_nodes: DependencyNodeList = []

        for node in buildable_nodes:
            if self.wkctx.settings.package_build_info(
                node.canonicalized_name
            ).exclusive_build:
                logger.info(f"{node.canonicalized_name}: requires exclusive build")
                yield [node]
            else:
                non_exclusive_nodes.append(node)

        # Yield all non-exclusive nodes together if any exist
        if non_exclusive_nodes:
            logger.info(
                "ready to build: %s",
                sorted(n.key for n in non_exclusive_nodes),
            )
            yield non_exclusive_nodes

    def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as built."""
        self.built_node_keys.add(node.key)
        # Remove from remaining nodes cache for efficiency
        if node in self._remaining_nodes:
            self._remaining_nodes.remove(node)

        logger.debug("marked %s as built", node.key)

    def _check_and_mark_done_in_build_sorter(self, node: dependency_graph.DependencyNode) -> None:
        """Mark a node as done in build sorter if all its install dependencies are built."""
        # Check if all install dependencies of this node are built
        for edge in node.children:
            if edge.req_type.is_install_requirement:
                if not self.is_node_built(edge.destination_node):
                    # Not all install dependencies are built yet
                    return
        
        # All install dependencies are built, mark as done in build sorter
        try:
            self.build_sorter.done(node)
            logger.debug("marked %s as done in build sorter (install deps satisfied)", node.key)
        except ValueError:
            # Node was not ready or already done
            pass

    def _update_build_sorter_done_status(self) -> None:
        """Check all built nodes to see if any can now be marked done in build sorter."""
        for node in self.graph.get_all_nodes():
            if node.key == dependency_graph.ROOT:
                continue
            if self.is_node_built(node):
                self._check_and_mark_done_in_build_sorter(node)

    def have_remaining_nodes(self) -> bool:
        """Check if there are any nodes left to build."""
        return bool(self._remaining_nodes)

    def get_built_nodes(self) -> DependencyNodeList:
        """Get all nodes that have been built."""
        return [
            node
            for node in self.graph.nodes.values()
            if node.key in self.built_node_keys
        ]

    def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
        """Check if a specific node has been built."""
        return node.key in self.built_node_keys


@click.command()
@click.option(
    "-f",
    "--force",
    is_flag=True,
    default=False,
    help="rebuild wheels e

dhellmann/cursor_add_topological_sorter_method.md

Add topological sorter method

Refactoring Benefits

Implementation Details

Key Changes

Benefits

Key Changes Made

1. Added Topological Sorters to Constructor

2. Simplified Build Logic

3. Updated Node Completion Logic

How It Works

Benefits

Key Changes Made

1. Changed Method to Generator

2. Organized Nodes by Exclusivity

3. Removed Old Logic

4. Updated Calling Code

How It Works

Benefits

Changes Made

1. Removed Redundant Method

2. Removed Redundant Filtering

3. Simplified Logic

Why This Works Better

Changes Made

1. Single Pass Processing

2. Eliminated Redundant Collections

3. Immediate Yielding

Benefits

Linter Results ✅

MyPy Results ✅

Summary