Exported on 9/28/2025 at 11:52:08 EDT from Cursor (1.7.11)
User
Add a method to DependencyGraph to return a graphlib TopologicalSorter for the graph based on the build dependencies of the nodes.
Cursor
I'll help you add a method to the DependencyGraph
class that returns a graphlib.TopologicalSorter
based on the build dependencies. Let me first examine the current DependencyGraph
class to understand its structure and existing methods.
Now I need to understand what RequirementType
looks like to see what build dependencies are called, and also check if graphlib
is already imported or if I need to add it.
Let me check the RequirementType
enum to understand what build dependencies are called:
Perfect! Now I can see that there are build-related requirement types: BUILD_SYSTEM
, BUILD_BACKEND
, and BUILD_SDIST
. There's also a convenient property is_build_requirement
that identifies all build requirements.
Now I'll create a todo list to track this task and then implement the method:
Now I'll implement the method. First, I need to add the graphlib
import and then create the method:
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
Now I'll add the method to the DependencyGraph
class. I'll add it near the end of the class, after the existing methods:
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
def get_build_dependency_topological_sorter(self) -> graphlib.TopologicalSorter[str]:
"""Return a TopologicalSorter for the build dependencies in the graph.
The sorter contains nodes as package keys (name==version) and edges
representing build-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a package key and edges
represent build dependencies (build-system, build-backend, build-sdist).
"""
sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()
# Get all build requirement types
build_req_types = [
RequirementType.BUILD_SYSTEM,
RequirementType.BUILD_BACKEND,
RequirementType.BUILD_SDIST,
]
# Collect all nodes that have build dependencies or are build dependencies
build_nodes: set[str] = set()
build_edges: list[tuple[str, str]] = []
# Traverse the graph to find build dependencies
for edge in self.get_dependency_edges(match_dep_types=build_req_types):
parent_key = None
# Find the parent node for this edge
for node in self.get_all_nodes():
if edge in node.children:
parent_key = node.key
break
if parent_key is not None:
build_nodes.add(parent_key)
build_nodes.add(edge.destination_node.key)
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
build_edges.append((parent_key, edge.destination_node.key))
# Add all build nodes to the sorter
for node_key in build_nodes:
sorter.add(node_key)
# Add dependencies to the sorter
for dependent, dependency in build_edges:
# Skip self-dependencies to avoid cycles
if dependent != dependency:
sorter.add(dependent, dependency)
return sorter
Now let me check for any linting errors in the file:
I need to fix the whitespace and trailing space issues. Let me clean those up:
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
def get_build_dependency_topological_sorter(self) -> graphlib.TopologicalSorter[str]:
"""Return a TopologicalSorter for the build dependencies in the graph.
The sorter contains nodes as package keys (name==version) and edges
representing build-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a package key and edges
represent build dependencies (build-system, build-backend, build-sdist).
"""
sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()
# Get all build requirement types
build_req_types = [
RequirementType.BUILD_SYSTEM,
RequirementType.BUILD_BACKEND,
RequirementType.BUILD_SDIST,
]
# Collect all nodes that have build dependencies or are build dependencies
build_nodes: set[str] = set()
build_edges: list[tuple[str, str]] = []
# Traverse the graph to find build dependencies
for edge in self.get_dependency_edges(match_dep_types=build_req_types):
parent_key = None
# Find the parent node for this edge
for node in self.get_all_nodes():
if edge in node.children:
parent_key = node.key
break
if parent_key is not None:
build_nodes.add(parent_key)
build_nodes.add(edge.destination_node.key)
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
build_edges.append((parent_key, edge.destination_node.key))
# Add all build nodes to the sorter
for node_key in build_nodes:
sorter.add(node_key)
# Add dependencies to the sorter
for dependent, dependency in build_edges:
# Skip self-dependencies to avoid cycles
if dependent != dependency:
sorter.add(dependent, dependency)
return sorter
Let me check the lints again to see if I've resolved the whitespace issues:
Good! The whitespace issues are resolved. The remaining linting errors are pre-existing issues in the file that are not related to my changes. Let me run the tests to make sure my implementation works correctly:
User
You don't need to traverse the graph to build the new one. Just iterate over all of the nodes and look at their outgoing edges.
Cursor
You're absolutely right! That's a much more efficient approach. Let me refactor the method to simply iterate over all nodes and examine their outgoing edges instead of traversing the graph.
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
def get_build_dependency_topological_sorter(self) -> graphlib.TopologicalSorter[str]:
"""Return a TopologicalSorter for the build dependencies in the graph.
The sorter contains nodes as package keys (name==version) and edges
representing build-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a package key and edges
represent build dependencies (build-system, build-backend, build-sdist).
"""
sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()
# Collect all nodes that have build dependencies or are build dependencies
build_nodes: set[str] = set()
# Iterate over all nodes and examine their outgoing build dependency edges
for node in self.get_all_nodes():
for edge in node.children:
if edge.req_type.is_build_requirement:
# This node has a build dependency
build_nodes.add(node.key)
build_nodes.add(edge.destination_node.key)
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles
if node.key != edge.destination_node.key:
sorter.add(node.key, edge.destination_node.key)
# Add any isolated build nodes (nodes with no dependencies)
for node_key in build_nodes:
sorter.add(node_key)
return sorter
Perfect! I've refactored the method to be much more efficient. The new implementation:
- Directly iterates over all nodes instead of traversing the graph
- Examines outgoing edges from each node to find build dependencies
- Uses the convenient
is_build_requirement
property fromRequirementType
to identify build dependencies - Builds the topological sorter more efficiently by adding dependencies as we find them
The key improvements:
- O(n) complexity instead of potentially more expensive graph traversal
- Simpler logic - just iterate through nodes and check their children
- Leverages existing properties like
is_build_requirement
- Cleaner code with fewer intermediate data structures
The method now returns a graphlib.TopologicalSorter[str]
that contains all nodes involved in build dependencies, with the proper dependency relationships established for topological sorting based on build requirements (build-system, build-backend, build-sdist).
User
Add a similar method to return a sorter using installation dependencies.
Cursor
I'll add a similar method for installation dependencies. Let me add it right after the build dependency method:
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
def get_build_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[str]:
"""Return a TopologicalSorter for the build dependencies in the graph.
The sorter contains nodes as package keys (name==version) and edges
representing build-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a package key and edges
represent build dependencies (build-system, build-backend, build-sdist).
"""
sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()
# Collect all nodes that have build dependencies or are build dependencies
build_nodes: set[str] = set()
# Iterate over all nodes and examine their outgoing build dependency edges
for node in self.get_all_nodes():
for edge in node.children:
if edge.req_type.is_build_requirement:
# This node has a build dependency
build_nodes.add(node.key)
build_nodes.add(edge.destination_node.key)
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles
if node.key != edge.destination_node.key:
sorter.add(node.key, edge.destination_node.key)
# Add any isolated build nodes (nodes with no dependencies)
for node_key in build_nodes:
sorter.add(node_key)
return sorter
def get_install_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[str]:
"""Return a TopologicalSorter for the install dependencies in the graph.
The sorter contains nodes as package keys (name==version) and edges
representing install-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a package key and edges
represent install dependencies (install, toplevel).
"""
sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()
# Collect all nodes that have install dependencies or are install dependencies
install_nodes: set[str] = set()
# Iterate over all nodes and examine their outgoing install dependency edges
for node in self.get_all_nodes():
for edge in node.children:
if edge.req_type.is_install_requirement:
# This node has an install dependency
install_nodes.add(node.key)
install_nodes.add(edge.destination_node.key)
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles
if node.key != edge.destination_node.key:
sorter.add(node.key, edge.destination_node.key)
# Add any isolated install nodes (nodes with no dependencies)
for node_key in install_nodes:
sorter.add(node_key)
return sorter
Perfect! I've added the get_install_dependency_topological_sorter
method that follows the same pattern as the build dependency method. This new method:
- Uses
is_install_requirement
to identify install dependencies (install and toplevel requirement types) - Follows the same efficient approach of iterating over all nodes and examining their outgoing edges
- Returns a
graphlib.TopologicalSorter[str]
with proper dependency relationships for install-time dependencies - Handles cycles by skipping self-dependencies
- Includes isolated nodes that might not have dependencies but are part of the install dependency graph
Now the DependencyGraph
class has both methods:
get_build_dependency_topological_sorter()
- for build-time dependencies (build-system, build-backend, build-sdist)get_install_dependency_topological_sorter()
- for install-time dependencies (install, toplevel)
Both methods use the same efficient O(n) approach and return topological sorters that can be used to determine the proper order for processing packages based on their respective dependency types.
User
Instead of using str for the type in the graph, use DependencyNode
Cursor
I'll update both methods to use DependencyNode
instead of str
for the TopologicalSorter type. This makes more sense since we're working with the actual node objects.
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
def get_build_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the build dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing build-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent build dependencies (build-system, build-backend, build-sdist).
"""
sorter: graphlib.TopologicalSorter[DependencyNode] = graphlib.TopologicalSorter()
# Collect all nodes that have build dependencies or are build dependencies
build_nodes: set[DependencyNode] = set()
# Iterate over all nodes and examine their outgoing build dependency edges
for node in self.get_all_nodes():
for edge in node.children:
if edge.req_type.is_build_requirement:
# This node has a build dependency
build_nodes.add(node)
build_nodes.add(edge.destination_node)
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles
if node != edge.destination_node:
sorter.add(node, edge.destination_node)
# Add any isolated build nodes (nodes with no dependencies)
for node in build_nodes:
sorter.add(node)
return sorter
def get_install_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[str]:
"""Return a TopologicalSorter for the install dependencies in the graph.
The sorter contains nodes as package keys (name==version) and edges
representing install-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a package key and edges
represent install dependencies (install, toplevel).
"""
sorter: graphlib.TopologicalSorter[str] = graphlib.TopologicalSorter()
# Collect all nodes that have install dependencies or are install dependencies
install_nodes: set[str] = set()
# Iterate over all nodes and examine their outgoing install dependency edges
for node in self.get_all_nodes():
for edge in node.children:
if edge.req_type.is_install_requirement:
# This node has an install dependency
install_nodes.add(node.key)
install_nodes.add(edge.destination_node.key)
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles
if node.key != edge.destination_node.key:
sorter.add(node.key, edge.destination_node.key)
# Add any isolated install nodes (nodes with no dependencies)
for node_key in install_nodes:
sorter.add(node_key)
return sorter
Now let me update the install dependency method as well:
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
def get_build_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the build dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing build-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent build dependencies (build-system, build-backend, build-sdist).
"""
sorter: graphlib.TopologicalSorter[DependencyNode] = graphlib.TopologicalSorter()
# Collect all nodes that have build dependencies or are build dependencies
build_nodes: set[DependencyNode] = set()
# Iterate over all nodes and examine their outgoing build dependency edges
for node in self.get_all_nodes():
for edge in node.children:
if edge.req_type.is_build_requirement:
# This node has a build dependency
build_nodes.add(node)
build_nodes.add(edge.destination_node)
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles
if node != edge.destination_node:
sorter.add(node, edge.destination_node)
# Add any isolated build nodes (nodes with no dependencies)
for node in build_nodes:
sorter.add(node)
return sorter
def get_install_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the install dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing install-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent install dependencies (install, toplevel).
"""
sorter: graphlib.TopologicalSorter[DependencyNode] = graphlib.TopologicalSorter()
# Collect all nodes that have install dependencies or are install dependencies
install_nodes: set[DependencyNode] = set()
# Iterate over all nodes and examine their outgoing install dependency edges
for node in self.get_all_nodes():
for edge in node.children:
if edge.req_type.is_install_requirement:
# This node has an install dependency
install_nodes.add(node)
install_nodes.add(edge.destination_node)
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles
if node != edge.destination_node:
sorter.add(node, edge.destination_node)
# Add any isolated install nodes (nodes with no dependencies)
for node in install_nodes:
sorter.add(node)
return sorter
Perfect! I've updated both methods to use DependencyNode
instead of str
for the TopologicalSorter type. The key changes:
- Return type: Changed from
graphlib.TopologicalSorter[str]
tographlib.TopologicalSorter[DependencyNode]
- Internal collections: Changed from
set[str]
toset[DependencyNode]
- Sorter operations: Now using the actual
DependencyNode
objects instead of their string keys - Comparisons: Using direct node object comparison (
node != edge.destination_node
) instead of key comparison
This is much more type-safe and semantically correct since:
- The TopologicalSorter now works directly with the rich
DependencyNode
objects - Consumers can access all the node properties (version, canonicalized_name, download_url, etc.) directly
- No need to look up nodes by key after getting the sorted result
- Better encapsulation since the internal structure (keys) is not exposed
Both methods now return graphlib.TopologicalSorter[DependencyNode]
objects that can be used to get topologically sorted dependency nodes for build and install dependencies respectively.
User
Refactor both methods to share common logic.
Cursor
I'll refactor both methods to share common logic by creating a private helper method that takes a predicate function to determine which requirement types to include.
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
def _get_dependency_topological_sorter(
self,
requirement_filter: typing.Callable[[RequirementType], bool],
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for dependencies matching the filter.
Args:
requirement_filter: Function that returns True for requirement types to include
Returns:
A TopologicalSorter with DependencyNode objects and dependency relationships
"""
sorter: graphlib.TopologicalSorter[DependencyNode] = (
graphlib.TopologicalSorter()
)
# Collect all nodes that have matching dependencies or are matching dependencies
dependency_nodes: set[DependencyNode] = set()
# Iterate over all nodes and examine their outgoing dependency edges
for node in self.get_all_nodes():
for edge in node.children:
if requirement_filter(edge.req_type):
# This node has a matching dependency
dependency_nodes.add(node)
dependency_nodes.add(edge.destination_node)
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles
if node != edge.destination_node:
sorter.add(node, edge.destination_node)
# Add any isolated nodes (nodes with no dependencies)
for node in dependency_nodes:
sorter.add(node)
return sorter
def get_build_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the build dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing build-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent build dependencies (build-system, build-backend, build-sdist).
"""
return self._get_dependency_topological_sorter(
lambda req_type: req_type.is_build_requirement
)
def get_install_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the install dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing install-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent install dependencies (install, toplevel).
"""
return self._get_dependency_topological_sorter(
lambda req_type: req_type.is_install_requirement
)
Perfect! I've successfully refactored both methods to share common logic. Here's what I did:
-
Eliminated Code Duplication: The core logic for building a topological sorter is now in a single private method
_get_dependency_topological_sorter
-
Flexible Design: The private method takes a
requirement_filter
function that determines which requirement types to include, making it reusable for any type of dependency filtering -
Cleaner Public Interface: Both public methods are now simple one-liners that call the shared implementation with the appropriate filter
-
Maintainability: Any future changes to the topological sorting logic only need to be made in one place
_get_dependency_topological_sorter
: Private helper method that contains all the shared logicrequirement_filter
: A callable that takes aRequirementType
and returnsbool
to determine inclusion- Build dependencies: Uses
lambda req_type: req_type.is_build_requirement
- Install dependencies: Uses
lambda req_type: req_type.is_install_requirement
This refactoring makes the code more maintainable and follows the DRY (Don't Repeat Yourself) principle while preserving the exact same functionality. If you need to add more dependency type filters in the future, you can easily do so by calling the private method with different filter functions.
User
The sorter should always include all nodes in the graph
Cursor
You're right! The topological sorter should include all nodes in the graph, not just those that have dependencies of the specified type. Let me fix the implementation:
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
def _get_dependency_topological_sorter(
self,
requirement_filter: typing.Callable[[RequirementType], bool],
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for dependencies matching the filter.
Args:
requirement_filter: Function that returns True for requirement types to include
Returns:
A TopologicalSorter with all DependencyNode objects and filtered dependency relationships
"""
sorter: graphlib.TopologicalSorter[DependencyNode] = (
graphlib.TopologicalSorter()
)
# Add all nodes to the sorter first
for node in self.get_all_nodes():
sorter.add(node)
# Then add dependency relationships that match the filter
for node in self.get_all_nodes():
for edge in node.children:
if requirement_filter(edge.req_type):
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles
if node != edge.destination_node:
sorter.add(node, edge.destination_node)
return sorter
def get_build_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the build dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing build-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent build dependencies (build-system, build-backend, build-sdist).
"""
return self._get_dependency_topological_sorter(
lambda req_type: req_type.is_build_requirement
)
def get_install_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the install dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing install-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent install dependencies (install, toplevel).
"""
return self._get_dependency_topological_sorter(
lambda req_type: req_type.is_install_requirement
)
Perfect! Now the implementation correctly includes all nodes in the graph. Here's what I changed:
-
All nodes included: The sorter now includes every node in the dependency graph, not just those involved in the filtered dependency type
-
Two-phase approach:
- Phase 1: Add all nodes to the sorter without dependencies
- Phase 2: Add only the dependency relationships that match the filter
-
Cleaner logic: Removed the
dependency_nodes
set since we're now including all nodes by default
- Complete graph representation: The topological sorter contains the full graph structure
- Filtered relationships: Only the relevant dependency relationships (build or install) are used for ordering
- Isolated nodes handled: Nodes without dependencies of the filtered type are still included in the sorter
- Consistent behavior: Both build and install dependency sorters will contain all nodes, just with different dependency relationships
This means that when you call get_build_dependency_topological_sorter()
, you get all nodes in the graph but only build dependencies affect the ordering. Similarly, get_install_dependency_topological_sorter()
includes all nodes but only install dependencies affect the ordering.
User
Combine the two loops
Cursor
You're right! I can combine the two loops into a single iteration. Let me refactor that:
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
def _get_dependency_topological_sorter(
self,
requirement_filter: typing.Callable[[RequirementType], bool],
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for dependencies matching the filter.
Args:
requirement_filter: Function that returns True for requirement types to include
Returns:
A TopologicalSorter with all DependencyNode objects and filtered dependency relationships
"""
sorter: graphlib.TopologicalSorter[DependencyNode] = (
graphlib.TopologicalSorter()
)
# Add all nodes and filtered dependency relationships
for node in self.get_all_nodes():
sorter.add(node)
for edge in node.children:
if requirement_filter(edge.req_type):
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles
if node != edge.destination_node:
sorter.add(node, edge.destination_node)
return sorter
def get_build_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the build dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing build-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent build dependencies (build-system, build-backend, build-sdist).
"""
return self._get_dependency_topological_sorter(
lambda req_type: req_type.is_build_requirement
)
def get_install_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the install dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing install-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent install dependencies (install, toplevel).
"""
return self._get_dependency_topological_sorter(
lambda req_type: req_type.is_install_requirement
)
Perfect! I've combined the two loops into a single iteration. Now the method:
- Iterates through all nodes once
- Adds each node to the sorter (ensuring all nodes are included)
- Examines the node's outgoing edges in the same iteration
- Adds filtered dependency relationships when they match the requirement filter
This is more efficient as it reduces the number of iterations over the nodes from 2 to 1, while maintaining the same functionality of including all nodes and only the relevant dependency relationships.
User
Call prepare() on the graph before returning it
Cursor
I'll add a call to prepare()
on the sorter before returning it. This prepares the topological sorter for iteration:
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
def _get_dependency_topological_sorter(
self,
requirement_filter: typing.Callable[[RequirementType], bool],
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for dependencies matching the filter.
Args:
requirement_filter: Function that returns True for requirement types to include
Returns:
A TopologicalSorter with all DependencyNode objects and filtered dependency relationships
"""
sorter: graphlib.TopologicalSorter[DependencyNode] = (
graphlib.TopologicalSorter()
)
# Add all nodes and filtered dependency relationships
for node in self.get_all_nodes():
sorter.add(node)
for edge in node.children:
if requirement_filter(edge.req_type):
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles
if node != edge.destination_node:
sorter.add(node, edge.destination_node)
sorter.prepare()
return sorter
def get_build_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the build dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing build-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent build dependencies (build-system, build-backend, build-sdist).
"""
return self._get_dependency_topological_sorter(
lambda req_type: req_type.is_build_requirement
)
def get_install_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the install dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing install-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent install dependencies (install, toplevel).
"""
return self._get_dependency_topological_sorter(
lambda req_type: req_type.is_install_requirement
)
Perfect! I've added the sorter.prepare()
call before returning the sorter. This is important because:
-
Prepares for iteration: The
prepare()
method readies the TopologicalSorter for use with methods likeget_ready()
anddone()
-
Validates the graph: It checks for cycles and raises
CycleError
if any are found -
Optimizes performance: It sets up internal data structures for efficient topological sorting operations
-
Required for stateful operations: After calling
prepare()
, you can use the sorter's stateful methods to get nodes that are ready to be processed (have no remaining dependencies)
Now both get_build_dependency_topological_sorter()
and get_install_dependency_topological_sorter()
return fully prepared TopologicalSorter objects that are ready to be used for iterating through the dependency graph in topological order.
User
Update the ParallelBuildManager to use 2 topological sorters for the graph to manage the build order. Nodes become ready to build when the build dependency graph says they are. When nodes are built, they should be marked done in the install dependency graph and then all of the ready nodes of the install dependency graph should be marked as done in the build dependency graph.
Cursor
I need to examine the current ParallelBuildManager implementation to understand how to integrate the two topological sorters. Let me first look at the build command and then the ParallelBuildManager.
Now let me look at the complete ParallelBuildManager class to understand its current methods:
Now I need to create a todo list and then update the ParallelBuildManager to use the two topological sorters:
Now let me update the ParallelBuildManager to use the two topological sorters. First, I'll need to import graphlib and update the constructor:
from __future__ import annotations
import concurrent.futures
import graphlib
import logging
import pathlib
import threading
import typing
import click
from packaging.requirements import Requirement
from packaging.version import Version
from .. import context, dependency_graph, metrics, server, wheels
from ..build_environment import req_ctxvar_context
Let me check the current imports in the build.py file:
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
def _find_buildable_nodes(
self, nodes_to_build: DependencyNodeList
) -> DependencyNodeList:
"""Find nodes that can be built (all build dependencies and their install dependencies are built)."""
buildable_nodes: DependencyNodeList = []
for node in nodes_to_build:
with req_ctxvar_context(Requirement(node.canonicalized_name), node.version):
# Get all build dependencies (build-system, build-backend, build-sdist)
build_deps: DependencyNodeList = [
edge.destination_node
for edge in node.children
if edge.req_type.is_build_requirement
]
# Collect all dependencies that must be built before this node can be built
all_required_deps: set[str] = set()
# Add direct build dependencies
for build_dep in build_deps:
all_required_deps.add(build_dep.key)
# Add installation dependencies of each build dependency
install_deps_of_build_dep = [
edge.destination_node
for edge in build_dep.children
if edge.req_type.is_install_requirement
]
for install_dep in install_deps_of_build_dep:
all_required_deps.add(install_dep.key)
# A node can be built when all required dependencies are built
unbuilt_deps: set[str] = set(
dep_key
for dep_key in all_required_deps
if dep_key not in self.built_node_keys
)
if not unbuilt_deps:
logger.info(
"ready to build, have all build dependencies and their install dependencies: %s",
sorted(all_required_deps),
)
buildable_nodes.append(node)
else:
logger.info(
"waiting for build dependencies and their install dependencies: %s",
sorted(unbuilt_deps),
)
return buildable_nodes
def _filter_for_exclusive_builds(
self, buildable_nodes: DependencyNodeList
) -> DependencyNodeList:
"""Filter buildable nodes to handle exclusive build requirements."""
# Check if any buildable node requires exclusive build (exclusive_build == True)
exclusive_nodes: DependencyNodeList = [
node
for node in buildable_nodes
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build
]
if exclusive_nodes:
# Only build the first exclusive node this round
filtered_nodes = [exclusive_nodes[0]]
logger.info(
f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
)
return filtered_nodes
return buildable_nodes
def get_nodes_ready_to_build(self) -> DependencyNodeList:
"""Get the list of nodes that are ready to be built in this round."""
buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Handle exclusive builds
buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)
return buildable_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get nodes that are ready to be built in this round
buildable_nodes = build_manager.get_nodes_ready_to_build()
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build: %s", sorted(n.key for n in buildable_nodes)
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now let me update the ParallelBuildManager constructor to use the two topological sorters:
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_build_dependency_topological_sorter()
)
self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_install_dependency_topological_sorter()
)
def _find_buildable_nodes(
self, nodes_to_build: DependencyNodeList
) -> DependencyNodeList:
"""Find nodes that can be built (all build dependencies and their install dependencies are built)."""
buildable_nodes: DependencyNodeList = []
for node in nodes_to_build:
with req_ctxvar_context(Requirement(node.canonicalized_name), node.version):
# Get all build dependencies (build-system, build-backend, build-sdist)
build_deps: DependencyNodeList = [
edge.destination_node
for edge in node.children
if edge.req_type.is_build_requirement
]
# Collect all dependencies that must be built before this node can be built
all_required_deps: set[str] = set()
# Add direct build dependencies
for build_dep in build_deps:
all_required_deps.add(build_dep.key)
# Add installation dependencies of each build dependency
install_deps_of_build_dep = [
edge.destination_node
for edge in build_dep.children
if edge.req_type.is_install_requirement
]
for install_dep in install_deps_of_build_dep:
all_required_deps.add(install_dep.key)
# A node can be built when all required dependencies are built
unbuilt_deps: set[str] = set(
dep_key
for dep_key in all_required_deps
if dep_key not in self.built_node_keys
)
if not unbuilt_deps:
logger.info(
"ready to build, have all build dependencies and their install dependencies: %s",
sorted(all_required_deps),
)
buildable_nodes.append(node)
else:
logger.info(
"waiting for build dependencies and their install dependencies: %s",
sorted(unbuilt_deps),
)
return buildable_nodes
def _filter_for_exclusive_builds(
self, buildable_nodes: DependencyNodeList
) -> DependencyNodeList:
"""Filter buildable nodes to handle exclusive build requirements."""
# Check if any buildable node requires exclusive build (exclusive_build == True)
exclusive_nodes: DependencyNodeList = [
node
for node in buildable_nodes
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build
]
if exclusive_nodes:
# Only build the first exclusive node this round
filtered_nodes = [exclusive_nodes[0]]
logger.info(
f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
)
return filtered_nodes
return buildable_nodes
def get_nodes_ready_to_build(self) -> DependencyNodeList:
"""Get the list of nodes that are ready to be built in this round."""
buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Handle exclusive builds
buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)
return buildable_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get nodes that are ready to be built in this round
buildable_nodes = build_manager.get_nodes_ready_to_build()
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build: %s", sorted(n.key for n in buildable_nodes)
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now I need to replace the _find_buildable_nodes
method to use the build dependency sorter instead of the current complex logic:
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_build_dependency_topological_sorter()
)
self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_install_dependency_topological_sorter()
)
def _find_buildable_nodes(
self, nodes_to_build: DependencyNodeList
) -> DependencyNodeList:
"""Find nodes that can be built using the build dependency topological sorter."""
# Get nodes that are ready to build from the build dependency sorter
ready_nodes = list(self.build_sorter.get_ready())
# Filter to only include nodes that are in our nodes_to_build list and not already built
buildable_nodes: DependencyNodeList = [
node for node in ready_nodes
if node in nodes_to_build and not self.is_node_built(node)
]
if buildable_nodes:
logger.info(
"ready to build from build dependency sorter: %s",
sorted(n.key for n in buildable_nodes),
)
return buildable_nodes
def _filter_for_exclusive_builds(
self, buildable_nodes: DependencyNodeList
) -> DependencyNodeList:
"""Filter buildable nodes to handle exclusive build requirements."""
# Check if any buildable node requires exclusive build (exclusive_build == True)
exclusive_nodes: DependencyNodeList = [
node
for node in buildable_nodes
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build
]
if exclusive_nodes:
# Only build the first exclusive node this round
filtered_nodes = [exclusive_nodes[0]]
logger.info(
f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
)
return filtered_nodes
return buildable_nodes
def get_nodes_ready_to_build(self) -> DependencyNodeList:
"""Get the list of nodes that are ready to be built in this round."""
buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Handle exclusive builds
buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)
return buildable_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get nodes that are ready to be built in this round
buildable_nodes = build_manager.get_nodes_ready_to_build()
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build: %s", sorted(n.key for n in buildable_nodes)
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now I need to update the mark_node_built
method to handle both sorters according to the requirements: when nodes are built, they should be marked done in the install dependency graph, and then all ready nodes from the install dependency graph should be marked as done in the build dependency graph:
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_build_dependency_topological_sorter()
)
self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_install_dependency_topological_sorter()
)
def _find_buildable_nodes(
self, nodes_to_build: DependencyNodeList
) -> DependencyNodeList:
"""Find nodes that can be built using the build dependency topological sorter."""
# Get nodes that are ready to build from the build dependency sorter
ready_nodes = list(self.build_sorter.get_ready())
# Filter to only include nodes that are in our nodes_to_build list and not already built
buildable_nodes: DependencyNodeList = [
node for node in ready_nodes
if node in nodes_to_build and not self.is_node_built(node)
]
if buildable_nodes:
logger.info(
"ready to build from build dependency sorter: %s",
sorted(n.key for n in buildable_nodes),
)
return buildable_nodes
def _filter_for_exclusive_builds(
self, buildable_nodes: DependencyNodeList
) -> DependencyNodeList:
"""Filter buildable nodes to handle exclusive build requirements."""
# Check if any buildable node requires exclusive build (exclusive_build == True)
exclusive_nodes: DependencyNodeList = [
node
for node in buildable_nodes
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build
]
if exclusive_nodes:
# Only build the first exclusive node this round
filtered_nodes = [exclusive_nodes[0]]
logger.info(
f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
)
return filtered_nodes
return buildable_nodes
def get_nodes_ready_to_build(self) -> DependencyNodeList:
"""Get the list of nodes that are ready to be built in this round."""
buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Handle exclusive builds
buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)
return buildable_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes) if ready_install_nodes else "none"
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get nodes that are ready to be built in this round
buildable_nodes = build_manager.get_nodes_ready_to_build()
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build: %s", sorted(n.key for n in buildable_nodes)
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Let me check for any linting errors in the updated file:
I need to fix the whitespace issues I introduced:
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_build_dependency_topological_sorter()
)
self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_install_dependency_topological_sorter()
)
def _find_buildable_nodes(
self, nodes_to_build: DependencyNodeList
) -> DependencyNodeList:
"""Find nodes that can be built using the build dependency topological sorter."""
# Get nodes that are ready to build from the build dependency sorter
ready_nodes = list(self.build_sorter.get_ready())
# Filter to only include nodes that are in our nodes_to_build list and not already built
buildable_nodes: DependencyNodeList = [
node for node in ready_nodes
if node in nodes_to_build and not self.is_node_built(node)
]
if buildable_nodes:
logger.info(
"ready to build from build dependency sorter: %s",
sorted(n.key for n in buildable_nodes),
)
return buildable_nodes
def _filter_for_exclusive_builds(
self, buildable_nodes: DependencyNodeList
) -> DependencyNodeList:
"""Filter buildable nodes to handle exclusive build requirements."""
# Check if any buildable node requires exclusive build (exclusive_build == True)
exclusive_nodes: DependencyNodeList = [
node
for node in buildable_nodes
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build
]
if exclusive_nodes:
# Only build the first exclusive node this round
filtered_nodes = [exclusive_nodes[0]]
logger.info(
f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
)
return filtered_nodes
return buildable_nodes
def get_nodes_ready_to_build(self) -> DependencyNodeList:
"""Get the list of nodes that are ready to be built in this round."""
buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Handle exclusive builds
buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)
return buildable_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes) if ready_install_nodes else "none"
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get nodes that are ready to be built in this round
buildable_nodes = build_manager.get_nodes_ready_to_build()
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build: %s", sorted(n.key for n in buildable_nodes)
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_build_dependency_topological_sorter()
)
self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_install_dependency_topological_sorter()
)
def _find_buildable_nodes(
self, nodes_to_build: DependencyNodeList
) -> DependencyNodeList:
"""Find nodes that can be built using the build dependency topological sorter."""
# Get nodes that are ready to build from the build dependency sorter
ready_nodes = list(self.build_sorter.get_ready())
# Filter to only include nodes that are in our nodes_to_build list and not already built
buildable_nodes: DependencyNodeList = [
node for node in ready_nodes
if node in nodes_to_build and not self.is_node_built(node)
]
if buildable_nodes:
logger.info(
"ready to build from build dependency sorter: %s",
sorted(n.key for n in buildable_nodes),
)
return buildable_nodes
def _filter_for_exclusive_builds(
self, buildable_nodes: DependencyNodeList
) -> DependencyNodeList:
"""Filter buildable nodes to handle exclusive build requirements."""
# Check if any buildable node requires exclusive build (exclusive_build == True)
exclusive_nodes: DependencyNodeList = [
node
for node in buildable_nodes
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build
]
if exclusive_nodes:
# Only build the first exclusive node this round
filtered_nodes = [exclusive_nodes[0]]
logger.info(
f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
)
return filtered_nodes
return buildable_nodes
def get_nodes_ready_to_build(self) -> DependencyNodeList:
"""Get the list of nodes that are ready to be built in this round."""
buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Handle exclusive builds
buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)
return buildable_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes) if ready_install_nodes else "none"
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get nodes that are ready to be built in this round
buildable_nodes = build_manager.get_nodes_ready_to_build()
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build: %s", sorted(n.key for n in buildable_nodes)
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_build_dependency_topological_sorter()
)
self.install_sorter: graphlib.TopologicalSorter[dependency_graph.DependencyNode] = (
graph.get_install_dependency_topological_sorter()
)
def _find_buildable_nodes(
self, nodes_to_build: DependencyNodeList
) -> DependencyNodeList:
"""Find nodes that can be built using the build dependency topological sorter."""
# Get nodes that are ready to build from the build dependency sorter
ready_nodes = list(self.build_sorter.get_ready())
# Filter to only include nodes that are in our nodes_to_build list and not already built
buildable_nodes: DependencyNodeList = [
node for node in ready_nodes
if node in nodes_to_build and not self.is_node_built(node)
]
if buildable_nodes:
logger.info(
"ready to build from build dependency sorter: %s",
sorted(n.key for n in buildable_nodes),
)
return buildable_nodes
def _filter_for_exclusive_builds(
self, buildable_nodes: DependencyNodeList
) -> DependencyNodeList:
"""Filter buildable nodes to handle exclusive build requirements."""
# Check if any buildable node requires exclusive build (exclusive_build == True)
exclusive_nodes: DependencyNodeList = [
node
for node in buildable_nodes
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build
]
if exclusive_nodes:
# Only build the first exclusive node this round
filtered_nodes = [exclusive_nodes[0]]
logger.info(
f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
)
return filtered_nodes
return buildable_nodes
def get_nodes_ready_to_build(self) -> DependencyNodeList:
"""Get the list of nodes that are ready to be built in this round."""
buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Handle exclusive builds
buildable_nodes = self._filter_for_exclusive_builds(buildable_nodes)
return buildable_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes) if ready_install_nodes else "none"
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get nodes that are ready to be built in this round
buildable_nodes = build_manager.get_nodes_ready_to_build()
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build: %s", sorted(n.key for n in buildable_nodes)
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Perfect! I've successfully updated the ParallelBuildManager
to use two topological sorters for managing the build order. Here's a summary of the changes:
- Added
build_sorter
usinggraph.get_build_dependency_topological_sorter()
- Added
install_sorter
usinggraph.get_install_dependency_topological_sorter()
- Both sorters are prepared and ready for use
- Replaced complex
_find_buildable_nodes
logic with simple call tobuild_sorter.get_ready()
- Nodes become ready to build when the build dependency graph says they are
- Much cleaner and more efficient than the previous manual dependency checking
- When a node is built, it's marked as
done()
in the install dependency graph - All nodes that become ready in the install dependency graph are then marked as
done()
in the build dependency graph - This implements the exact flow you specified
- Build Readiness: Nodes are ready to build when
build_sorter.get_ready()
returns them - Build Completion: When
mark_node_built()
is called:- Node is marked done in the install sorter
- Any install dependencies that become ready are marked done in the build sorter
- This cascades the completion through both dependency graphs
- Correctness: Uses proper topological sorting to ensure correct build order
- Efficiency: No more manual dependency traversal and checking
- Maintainability: Much simpler logic that's easier to understand and debug
- Scalability: Handles complex dependency graphs efficiently
The implementation now properly manages both build-time and install-time dependencies using the topological sorters, ensuring that packages are built in the correct order while respecting both types of dependencies.
User
Change get_nodes_ready_to_build to be a generator returning DependencyNodeLists. Organize the buildable_nodes into separate lists based on whether the packages are exclusive or not. Each exclusive build node should be in its own list. All non-exclusive packages should be in 1 list together.
Cursor
I need to update the get_nodes_ready_to_build
method to be a generator that yields separate lists based on exclusive build requirements. Let me examine the current method and then refactor it:
Now let me create a todo list and update the method:
Now I'll refactor the method to be a generator that organizes nodes by exclusivity:
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
self.install_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_install_dependency_topological_sorter()
def _find_buildable_nodes(
self, nodes_to_build: DependencyNodeList
) -> DependencyNodeList:
"""Find nodes that can be built using the build dependency topological sorter."""
# Get nodes that are ready to build from the build dependency sorter
ready_nodes = list(self.build_sorter.get_ready())
# Filter to only include nodes that are in our nodes_to_build list and not already built
buildable_nodes: DependencyNodeList = [
node
for node in ready_nodes
if node in nodes_to_build and not self.is_node_built(node)
]
if buildable_nodes:
logger.info(
"ready to build from build dependency sorter: %s",
sorted(n.key for n in buildable_nodes),
)
return buildable_nodes
def _filter_for_exclusive_builds(
self, buildable_nodes: DependencyNodeList
) -> DependencyNodeList:
"""Filter buildable nodes to handle exclusive build requirements."""
# Check if any buildable node requires exclusive build (exclusive_build == True)
exclusive_nodes: DependencyNodeList = [
node
for node in buildable_nodes
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build
]
if exclusive_nodes:
# Only build the first exclusive node this round
filtered_nodes = [exclusive_nodes[0]]
logger.info(
f"{exclusive_nodes[0].canonicalized_name}: requires exclusive build, running it alone this round."
)
return filtered_nodes
return buildable_nodes
def get_nodes_ready_to_build(self) -> typing.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Separate exclusive and non-exclusive nodes
exclusive_nodes: DependencyNodeList = []
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
exclusive_nodes.append(node)
else:
non_exclusive_nodes.append(node)
# Yield each exclusive node in its own list
for exclusive_node in exclusive_nodes:
logger.info(
f"{exclusive_node.canonicalized_name}: requires exclusive build, yielding it alone"
)
yield [exclusive_node]
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"yielding non-exclusive nodes together: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes)
if ready_install_nodes
else "none",
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get nodes that are ready to be built in this round
buildable_nodes = build_manager.get_nodes_ready_to_build()
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build: %s", sorted(n.key for n in buildable_nodes)
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now I need to remove the old _filter_for_exclusive_builds
method since it's no longer needed, and update the calling code to handle the generator:
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
self.install_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_install_dependency_topological_sorter()
def _find_buildable_nodes(
self, nodes_to_build: DependencyNodeList
) -> DependencyNodeList:
"""Find nodes that can be built using the build dependency topological sorter."""
# Get nodes that are ready to build from the build dependency sorter
ready_nodes = list(self.build_sorter.get_ready())
# Filter to only include nodes that are in our nodes_to_build list and not already built
buildable_nodes: DependencyNodeList = [
node
for node in ready_nodes
if node in nodes_to_build and not self.is_node_built(node)
]
if buildable_nodes:
logger.info(
"ready to build from build dependency sorter: %s",
sorted(n.key for n in buildable_nodes),
)
return buildable_nodes
def get_nodes_ready_to_build(self) -> typing.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Separate exclusive and non-exclusive nodes
exclusive_nodes: DependencyNodeList = []
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
exclusive_nodes.append(node)
else:
non_exclusive_nodes.append(node)
# Yield each exclusive node in its own list
for exclusive_node in exclusive_nodes:
logger.info(
f"{exclusive_node.canonicalized_name}: requires exclusive build, yielding it alone"
)
yield [exclusive_node]
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"yielding non-exclusive nodes together: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes)
if ready_install_nodes
else "none",
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get nodes that are ready to be built in this round
buildable_nodes = build_manager.get_nodes_ready_to_build()
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build: %s", sorted(n.key for n in buildable_nodes)
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now I need to find and update the calling code that uses get_nodes_ready_to_build
. Let me look at the build_parallel function:
Now I need to update the calling code to handle the generator. The logic should iterate over each batch of nodes yielded by the generator:
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
self.install_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_install_dependency_topological_sorter()
def _find_buildable_nodes(
self, nodes_to_build: DependencyNodeList
) -> DependencyNodeList:
"""Find nodes that can be built using the build dependency topological sorter."""
# Get nodes that are ready to build from the build dependency sorter
ready_nodes = list(self.build_sorter.get_ready())
# Filter to only include nodes that are in our nodes_to_build list and not already built
buildable_nodes: DependencyNodeList = [
node
for node in ready_nodes
if node in nodes_to_build and not self.is_node_built(node)
]
if buildable_nodes:
logger.info(
"ready to build from build dependency sorter: %s",
sorted(n.key for n in buildable_nodes),
)
return buildable_nodes
def get_nodes_ready_to_build(self) -> typing.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Separate exclusive and non-exclusive nodes
exclusive_nodes: DependencyNodeList = []
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
exclusive_nodes.append(node)
else:
non_exclusive_nodes.append(node)
# Yield each exclusive node in its own list
for exclusive_node in exclusive_nodes:
logger.info(
f"{exclusive_node.canonicalized_name}: requires exclusive build, yielding it alone"
)
yield [exclusive_node]
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"yielding non-exclusive nodes together: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes)
if ready_install_nodes
else "none",
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s", sorted(n.key for n in buildable_nodes)
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Let me check for any linting errors:
I need to fix the whitespace issue and update the import for the Generator type:
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
self.install_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_install_dependency_topological_sorter()
def _find_buildable_nodes(
self, nodes_to_build: DependencyNodeList
) -> DependencyNodeList:
"""Find nodes that can be built using the build dependency topological sorter."""
# Get nodes that are ready to build from the build dependency sorter
ready_nodes = list(self.build_sorter.get_ready())
# Filter to only include nodes that are in our nodes_to_build list and not already built
buildable_nodes: DependencyNodeList = [
node
for node in ready_nodes
if node in nodes_to_build and not self.is_node_built(node)
]
if buildable_nodes:
logger.info(
"ready to build from build dependency sorter: %s",
sorted(n.key for n in buildable_nodes),
)
return buildable_nodes
def get_nodes_ready_to_build(self) -> typing.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
buildable_nodes = self._find_buildable_nodes(self._remaining_nodes)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Separate exclusive and non-exclusive nodes
exclusive_nodes: DependencyNodeList = []
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
exclusive_nodes.append(node)
else:
non_exclusive_nodes.append(node)
# Yield each exclusive node in its own list
for exclusive_node in exclusive_nodes:
logger.info(
f"{exclusive_node.canonicalized_name}: requires exclusive build, yielding it alone"
)
yield [exclusive_node]
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"yielding non-exclusive nodes together: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes)
if ready_install_nodes
else "none",
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s", sorted(n.key for n in buildable_nodes)
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Perfect! I've successfully refactored the get_nodes_ready_to_build
method to be a generator that organizes buildable nodes by exclusivity. Here's a summary of the changes:
- Updated return type to
typing.Generator[DependencyNodeList, None, None]
- Method now yields separate lists instead of returning a single list
- Exclusive nodes: Each exclusive build node is yielded in its own separate list
- Non-exclusive nodes: All non-exclusive nodes are yielded together in a single list
- This ensures exclusive builds run alone while allowing parallel builds for non-exclusive packages
- Eliminated the
_filter_for_exclusive_builds
method since the logic is now integrated into the generator - Cleaner separation of concerns
- Modified the build loop to iterate over each batch yielded by the generator
- Each batch is processed in its own ThreadPoolExecutor context
- Maintains the same parallel building behavior but respects exclusivity requirements
- Generator yields batches: Each call to
get_nodes_ready_to_build()
yields multiple batches - Exclusive builds: Each exclusive package gets its own batch (list with one item)
- Non-exclusive builds: All non-exclusive packages are in one batch together
- Sequential batch processing: Batches are processed sequentially, but within each batch, builds can run in parallel
- Proper isolation: Exclusive builds are guaranteed to run alone
- Flexible parallelism: Non-exclusive packages can build in parallel while exclusive packages build alone
- Better resource management: Each batch gets its own ThreadPoolExecutor
- Cleaner code: Logic is more organized and easier to understand
- Maintains correctness: Respects both dependency ordering and exclusivity requirements
The implementation now properly handles the complex requirements of managing both build dependencies through topological sorting and exclusive build constraints through batch organization.
User
The _find_buildable_nodes method is redundant. Move the logic inline in get_nodes_ready_to_build(). Remove the extra filter step, that is redundant.
Cursor
You're absolutely right! The _find_buildable_nodes
method is redundant since we're already using the topological sorter. Let me inline the logic and remove the redundant filtering:
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
buildable_nodes = list(self.build_sorter.get_ready())
Let me read the current state of the file to see the exact content:
Now I'll remove the _find_buildable_nodes
method and inline the logic, removing the redundant filtering:
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
self.install_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_install_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
buildable_nodes = list(self.build_sorter.get_ready())
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Separate exclusive and non-exclusive nodes
exclusive_nodes: DependencyNodeList = []
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
exclusive_nodes.append(node)
else:
non_exclusive_nodes.append(node)
# Yield each exclusive node in its own list
for exclusive_node in exclusive_nodes:
logger.info(
f"{exclusive_node.canonicalized_name}: requires exclusive build, yielding it alone"
)
yield [exclusive_node]
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"yielding non-exclusive nodes together: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes)
if ready_install_nodes
else "none",
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Perfect! I've successfully removed the redundant _find_buildable_nodes
method and inlined the logic. The key improvements are:
- Eliminated
_find_buildable_nodes
method entirely - Moved the core logic directly into
get_nodes_ready_to_build
- No longer filtering by
nodes_to_build
(which was justself._remaining_nodes
) - No longer filtering by
self.is_node_built(node)
since the topological sorter already handles this correctly - The topological sorter's
get_ready()
method already returns only nodes that are ready to build and haven't been marked as done
- Direct call to
self.build_sorter.get_ready()
- Much cleaner and more efficient
- TopologicalSorter handles state: The sorter already knows which nodes are done (via
done()
calls) and which are ready - No double filtering: We were redundantly checking if nodes were built when the sorter already manages this state
- Cleaner code: Single responsibility - the sorter manages readiness, the generator manages batching by exclusivity
- Better performance: Eliminates unnecessary list comprehensions and filtering
The implementation is now much more streamlined and relies properly on the topological sorter to manage the build state, while the generator focuses solely on organizing nodes by exclusivity requirements.
User
Instead of iterating over the buildable_nodes and then again over the exclusive nodes, yield the list with each exclusive node as it is encountered.
Cursor
You're right! Instead of collecting all nodes first and then iterating again, I should yield each exclusive node immediately as it's encountered. Let me refactor the logic:
Now I'll refactor to yield exclusive nodes immediately as they're encountered:
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
self.install_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_install_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
buildable_nodes = list(self.build_sorter.get_ready())
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(
f"{node.canonicalized_name}: requires exclusive build, yielding it alone"
)
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"yielding non-exclusive nodes together: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done in build sorter: %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes)
if ready_install_nodes
else "none",
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Perfect! I've refactored the logic to be much more efficient. Here are the key improvements:
- Now iterates through
buildable_nodes
only once - Yields exclusive nodes immediately as they're encountered
- Only collects non-exclusive nodes for later yielding
- Removed the
exclusive_nodes
list entirely - No longer need to iterate over exclusive nodes separately
- Only maintains
non_exclusive_nodes
list
- Exclusive nodes are yielded as soon as they're identified
- No need to collect them first and then iterate again
- More memory efficient and responsive
- Better Performance: Single iteration instead of double iteration
- Lower Memory Usage: Don't need to store exclusive nodes in a separate list
- More Responsive: Exclusive builds can start immediately when encountered
- Cleaner Code: Simpler logic flow with fewer intermediate collections
The refactored method now processes nodes in a single pass, yielding exclusive nodes immediately and collecting non-exclusive nodes for a final batch yield. This is much more efficient than the previous approach of collecting everything first and then iterating again.
User
Run the linter and mypy
Cursor
I'll run the linter and mypy to check for any issues in the code.
Excellent! Both the linter and mypy checks passed successfully:
- Ruff check: All checks passed!
- Ruff format: 80 files already formatted
- Mergify lint: All expected jobs found and configured correctly
- Source files: Success: no issues found in 49 source files
- Test files: Success: no issues found in 31 source files
The code changes we made are clean and properly typed:
- ✅ Added topological sorters to DependencyGraph - properly typed with
graphlib.TopologicalSorter[DependencyNode]
- ✅ Updated ParallelBuildManager - uses the new sorters correctly
- ✅ Refactored build logic - simplified and more efficient
- ✅ Generator implementation - properly typed with
collections.abc.Generator
- ✅ Code style - follows project formatting standards
- ✅ Type annotations - all types are correctly specified
All the refactoring work has been completed successfully with no linting or type checking issues. The code is ready for use and maintains high code quality standards.
User
Run the unit tests
Cursor
I'll run the unit tests to make sure all our changes work correctly.
The unit tests are failing because the tests were written for the old API where get_nodes_ready_to_build()
returned a list, but now it returns a generator. I need to update the tests to handle the new generator-based API. Let me examine the test file and fix the issues:
The main issues are:
- Generator vs List: Tests expect a list but now get a generator
- TopologicalSorter.done() errors: Can't call
done()
on nodes that weren't returned byget_ready()
Let me examine the test file to understand the expected behavior:
I see the issue. The tests expect get_nodes_ready_to_build()
to return a single list of nodes, but now it returns a generator that yields multiple batches. I need to update the tests to handle this new behavior. Let me also check if there are issues with the TopologicalSorter usage:
The core issue is that the tests need to be updated to work with the new generator API. The tests are trying to:
- Get a list of buildable nodes (now it's a generator of batches)
- Call
mark_node_built()
on nodes that weren't returned byget_ready()
(TopologicalSorter constraint)
I need to create a helper function to collect all nodes from the generator and fix the TopologicalSorter usage. Let me update the tests:
from unittest.mock import Mock, patch
import pytest
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from packaging.version import Version
from fromager import context, dependency_graph, requirements_file
from fromager.commands.build import ParallelBuildManager
def create_test_graph():
"""Create a test dependency graph with various dependency scenarios.
Graph structure:
- app_main: Top-level app (depends on lib_shared for install, tool_build for build)
- lib_shared: Shared library (depends on util_base for install, no build deps)
- util_base: Base utility (no dependencies - leaf node)
- tool_build: Build tool (depends on util_base for build and install)
- plugin_extra: Plugin (depends on lib_shared for install, tool_build for build)
- helper_internal: Internal helper (only used by tool_build, not top-level)
Expected build order: util_base → helper_internal → tool_build → lib_shared → (app_main, plugin_extra)
"""
graph = dependency_graph.DependencyGraph()
# Add top-level dependencies (directly requested by user)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("app_main==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/app_main-1.0.tar.gz",
)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("plugin_extra==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/plugin_extra-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("app_main"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_SYSTEM,
req=Requirement("tool_build==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/tool_build-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("app_main"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("lib_shared"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("util_base==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/util_base-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("plugin_extra"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_BACKEND,
req=Requirement("tool_build==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/tool_build-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("plugin_extra"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
# tool_build build dependencies (not top-level, only used by others)
graph.add_dependency(
parent_name=canonicalize_name("tool_build"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_SYSTEM,
req=Requirement("util_base==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/util_base-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("tool_build"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("helper_internal==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/helper_internal-1.0.tar.gz",
)
# util_base has no dependencies (leaf node)
# helper_internal has no dependencies (leaf node, not top-level)
return graph
@pytest.fixture
def mock_context():
"""Create a mock WorkContext for testing."""
ctx = Mock(spec=context.WorkContext)
ctx.settings = Mock()
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = False
return mock_pbi
ctx.settings.package_build_info = mock_package_build_info
return ctx
@pytest.fixture
def test_graph():
"""Provide the test dependency graph."""
return create_test_graph()
@pytest.fixture
def build_manager(mock_context, test_graph):
"""Create a ParallelBuildManager instance for testing."""
return ParallelBuildManager(mock_context, test_graph)
class TestParallelBuildManager:
"""Test suite for ParallelBuildManager class."""
def test_initialization(self, build_manager, test_graph):
"""Test that the manager initializes correctly."""
assert build_manager.wkctx is not None
assert build_manager.graph == test_graph
assert len(build_manager.built_node_keys) == 0
assert len(build_manager._remaining_nodes) == 6 # 6 non-root nodes
assert build_manager.have_remaining_nodes()
def test_have_remaining_nodes_initially_true(self, build_manager):
"""Test that initially there are remaining nodes."""
assert build_manager.have_remaining_nodes()
def test_have_remaining_nodes_false_when_all_built(self, build_manager, test_graph):
"""Test that have_remaining_nodes returns False when all nodes are built."""
# Mark all nodes as built
for node in test_graph.nodes.values():
if node.key != dependency_graph.ROOT:
build_manager.mark_node_built(node)
assert not build_manager.have_remaining_nodes()
def test_mark_node_built(self, build_manager, test_graph):
"""Test marking nodes as built."""
util_base = test_graph.nodes["util-base==1.0"]
assert not build_manager.is_node_built(util_base)
assert util_base.key not in build_manager.built_node_keys
assert util_base in build_manager._remaining_nodes
build_manager.mark_node_built(util_base)
# Verify node is now marked as built
assert build_manager.is_node_built(util_base)
assert util_base.key in build_manager.built_node_keys
assert util_base not in build_manager._remaining_nodes
def test_nodes_with_no_dependencies_buildable_first(self, build_manager):
"""Test that leaf nodes (no dependencies) are buildable first."""
# Collect all nodes from all batches
all_buildable_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_buildable_nodes.extend(batch)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert (
len(all_buildable_nodes) == 3
) # util-base, helper-internal, and lib-shared (no build deps)
assert "util-base" in buildable_names
assert "helper-internal" in buildable_names
assert "lib-shared" in buildable_names
def test_node_with_build_dependencies(self, build_manager, test_graph):
"""Test that tool_build becomes buildable after its dependencies are built."""
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "util-base" in buildable_names
assert "helper-internal" in buildable_names
assert "tool-build" not in buildable_names
util_base = test_graph.nodes["util-base==1.0"]
helper_internal = test_graph.nodes["helper-internal==1.0"]
build_manager.mark_node_built(util_base)
build_manager.mark_node_built(helper_internal)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "tool-build" in buildable_names
def test_transitive_dependencies(self, build_manager, test_graph):
"""Test that transitive dependencies are handled correctly.
app_main depends on tool_build for build, and tool_build depends on helper_internal for install.
So app_main should only be buildable after util_base, helper_internal, and tool_build are built.
"""
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" not in buildable_names
assert (
"helper-internal" in buildable_names
) # helper_internal should still be buildable
helper_internal = test_graph.nodes["helper-internal==1.0"]
build_manager.mark_node_built(helper_internal)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "tool-build" in buildable_names
assert "app-main" not in buildable_names
tool_build = test_graph.nodes["tool-build==1.0"]
build_manager.mark_node_built(tool_build)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "lib-shared" in buildable_names
def test_multiple_build_dependencies(self, build_manager, test_graph):
"""Test that app_main waits for all its dependencies."""
# app_main depends on tool_build for build and lib_shared for install
# tool_build depends on util_base and helper_internal
# lib_shared depends on util_base
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" not in buildable_names
helper_internal = test_graph.nodes["helper-internal==1.0"]
build_manager.mark_node_built(helper_internal)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "tool-build" in buildable_names
tool_build = test_graph.nodes["tool-build==1.0"]
build_manager.mark_node_built(tool_build)
lib_shared = test_graph.nodes["lib-shared==1.0"]
build_manager.mark_node_built(lib_shared)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" in buildable_names
def test_circular_dependency_detection(self, build_manager, test_graph):
"""Test that circular dependencies are detected."""
# Create a scenario where no nodes can be built but some remain
# Mark all nodes as remaining but create impossible conditions
# Remove all leaf nodes from remaining nodes but don't mark them as built
# This simulates a circular dependency scenario where no nodes can be built
build_manager._remaining_nodes = [
node
for node in build_manager._remaining_nodes
if node.canonicalized_name
not in ["util-base", "helper-internal", "lib-shared"]
]
with pytest.raises(ValueError, match="Circular dependency detected"):
build_manager.get_nodes_ready_to_build()
@patch("fromager.commands.build.logger")
def test_logging_messages(self, mock_logger, build_manager, test_graph):
"""Test that appropriate logging messages are generated."""
build_manager.get_nodes_ready_to_build()
mock_logger.info.assert_called()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
assert any("ready to build" in msg for msg in log_calls)
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
mock_logger.reset_mock()
build_manager.get_nodes_ready_to_build()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
assert any("waiting for" in msg for msg in log_calls)
class TestExclusiveBuildHandling:
"""Test suite specifically for exclusive build handling."""
def create_exclusive_build_graph(self):
"""Create a graph where some nodes require exclusive builds."""
graph = dependency_graph.DependencyGraph()
for name in ["normal_a", "normal_b", "exclusive_c"]:
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement(f"{name}==1.0"),
req_version=Version("1.0"),
download_url=f"http://example.com/{name}-1.0.tar.gz",
)
return graph
@pytest.fixture
def exclusive_build_manager(self, mock_context):
"""Create a manager with exclusive build settings."""
graph = self.create_exclusive_build_graph()
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = name == "exclusive-c"
return mock_pbi
mock_context.settings.package_build_info = mock_package_build_info
return ParallelBuildManager(mock_context, graph)
def test_exclusive_build_isolation(self, exclusive_build_manager):
"""Test that exclusive build nodes are not mixed with other nodes."""
buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
assert len(buildable_nodes) == 1
assert buildable_nodes[0].canonicalized_name == "exclusive-c"
def test_normal_nodes_built_together_when_no_exclusive(
self, exclusive_build_manager
):
"""Test that normal nodes can be built together when no exclusive nodes are ready."""
exclusive_node = None
for node in exclusive_build_manager._remaining_nodes:
if node.canonicalized_name == "exclusive-c":
exclusive_node = node
break
assert exclusive_node is not None
exclusive_build_manager.mark_node_built(exclusive_node)
buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert len(buildable_nodes) == 2
assert "normal-a" in buildable_names
assert "normal-b" in buildable_names
assert "exclusive-c" not in buildable_names
def test_multiple_exclusive_nodes_only_first_selected(self, mock_context):
"""Test that when multiple exclusive nodes are ready, only the first is selected."""
graph = dependency_graph.DependencyGraph()
for name in ["exclusive_a", "exclusive_b", "normal_c"]:
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement(f"{name}==1.0"),
req_version=Version("1.0"),
download_url=f"http://example.com/{name}-1.0.tar.gz",
)
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = name.startswith("exclusive-")
return mock_pbi
mock_context.settings.package_build_info = mock_package_build_info
manager = ParallelBuildManager(mock_context, graph)
buildable_nodes = manager.get_nodes_ready_to_build()
assert len(buildable_nodes) == 1
assert buildable_nodes[0].canonicalized_name.startswith("exclusive-")
@patch("fromager.commands.build.logger")
def test_exclusive_build_logging(self, mock_logger, exclusive_build_manager):
"""Test that exclusive build scenarios are logged appropriately."""
exclusive_build_manager.get_nodes_ready_to_build()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
exclusive_log_found = any(
"requires exclusive build" in msg for msg in log_calls
)
assert exclusive_log_found
Now let me continue updating the other test methods. The key insight is that I need to fix the TopologicalSorter issue - we can only call done()
on nodes that were returned by get_ready()
. Let me create a helper method to get ready nodes and mark them as built properly:
from unittest.mock import Mock, patch
import pytest
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from packaging.version import Version
from fromager import context, dependency_graph, requirements_file
from fromager.commands.build import ParallelBuildManager
def create_test_graph():
"""Create a test dependency graph with various dependency scenarios.
Graph structure:
- app_main: Top-level app (depends on lib_shared for install, tool_build for build)
- lib_shared: Shared library (depends on util_base for install, no build deps)
- util_base: Base utility (no dependencies - leaf node)
- tool_build: Build tool (depends on util_base for build and install)
- plugin_extra: Plugin (depends on lib_shared for install, tool_build for build)
- helper_internal: Internal helper (only used by tool_build, not top-level)
Expected build order: util_base → helper_internal → tool_build → lib_shared → (app_main, plugin_extra)
"""
graph = dependency_graph.DependencyGraph()
# Add top-level dependencies (directly requested by user)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("app_main==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/app_main-1.0.tar.gz",
)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("plugin_extra==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/plugin_extra-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("app_main"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_SYSTEM,
req=Requirement("tool_build==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/tool_build-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("app_main"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("lib_shared"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("util_base==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/util_base-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("plugin_extra"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_BACKEND,
req=Requirement("tool_build==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/tool_build-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("plugin_extra"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
# tool_build build dependencies (not top-level, only used by others)
graph.add_dependency(
parent_name=canonicalize_name("tool_build"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_SYSTEM,
req=Requirement("util_base==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/util_base-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("tool_build"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("helper_internal==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/helper_internal-1.0.tar.gz",
)
# util_base has no dependencies (leaf node)
# helper_internal has no dependencies (leaf node, not top-level)
return graph
@pytest.fixture
def mock_context():
"""Create a mock WorkContext for testing."""
ctx = Mock(spec=context.WorkContext)
ctx.settings = Mock()
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = False
return mock_pbi
ctx.settings.package_build_info = mock_package_build_info
return ctx
@pytest.fixture
def test_graph():
"""Provide the test dependency graph."""
return create_test_graph()
@pytest.fixture
def build_manager(mock_context, test_graph):
"""Create a ParallelBuildManager instance for testing."""
return ParallelBuildManager(mock_context, test_graph)
class TestParallelBuildManager:
"""Test suite for ParallelBuildManager class."""
def test_initialization(self, build_manager, test_graph):
"""Test that the manager initializes correctly."""
assert build_manager.wkctx is not None
assert build_manager.graph == test_graph
assert len(build_manager.built_node_keys) == 0
assert len(build_manager._remaining_nodes) == 6 # 6 non-root nodes
assert build_manager.have_remaining_nodes()
def test_have_remaining_nodes_initially_true(self, build_manager):
"""Test that initially there are remaining nodes."""
assert build_manager.have_remaining_nodes()
def test_have_remaining_nodes_false_when_all_built(self, build_manager, test_graph):
"""Test that have_remaining_nodes returns False when all nodes are built."""
# Mark all nodes as built
for node in test_graph.nodes.values():
if node.key != dependency_graph.ROOT:
build_manager.mark_node_built(node)
assert not build_manager.have_remaining_nodes()
def test_mark_node_built(self, build_manager, test_graph):
"""Test marking nodes as built."""
util_base = test_graph.nodes["util-base==1.0"]
assert not build_manager.is_node_built(util_base)
assert util_base.key not in build_manager.built_node_keys
assert util_base in build_manager._remaining_nodes
build_manager.mark_node_built(util_base)
# Verify node is now marked as built
assert build_manager.is_node_built(util_base)
assert util_base.key in build_manager.built_node_keys
assert util_base not in build_manager._remaining_nodes
def test_nodes_with_no_dependencies_buildable_first(self, build_manager):
"""Test that leaf nodes (no dependencies) are buildable first."""
# Collect all nodes from all batches
all_buildable_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_buildable_nodes.extend(batch)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert (
len(all_buildable_nodes) == 3
) # util-base, helper-internal, and lib-shared (no build deps)
assert "util-base" in buildable_names
assert "helper-internal" in buildable_names
assert "lib-shared" in buildable_names
def test_node_with_build_dependencies(self, build_manager, test_graph):
"""Test that tool_build becomes buildable after its dependencies are built."""
# Collect all nodes from all batches
all_buildable_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_buildable_nodes.extend(batch)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert "util-base" in buildable_names
assert "helper-internal" in buildable_names
assert "tool-build" not in buildable_names
# Mark the ready nodes as built (only nodes returned by get_ready can be marked as done)
util_base = next(node for node in all_buildable_nodes if node.canonicalized_name == "util-base")
helper_internal = next(node for node in all_buildable_nodes if node.canonicalized_name == "helper-internal")
build_manager.mark_node_built(util_base)
build_manager.mark_node_built(helper_internal)
# Get next batch of buildable nodes
all_buildable_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_buildable_nodes.extend(batch)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert "tool-build" in buildable_names
def test_transitive_dependencies(self, build_manager, test_graph):
"""Test that transitive dependencies are handled correctly.
app_main depends on tool_build for build, and tool_build depends on helper_internal for install.
So app_main should only be buildable after util_base, helper_internal, and tool_build are built.
"""
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" not in buildable_names
assert (
"helper-internal" in buildable_names
) # helper_internal should still be buildable
helper_internal = test_graph.nodes["helper-internal==1.0"]
build_manager.mark_node_built(helper_internal)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "tool-build" in buildable_names
assert "app-main" not in buildable_names
tool_build = test_graph.nodes["tool-build==1.0"]
build_manager.mark_node_built(tool_build)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "lib-shared" in buildable_names
def test_multiple_build_dependencies(self, build_manager, test_graph):
"""Test that app_main waits for all its dependencies."""
# app_main depends on tool_build for build and lib_shared for install
# tool_build depends on util_base and helper_internal
# lib_shared depends on util_base
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" not in buildable_names
helper_internal = test_graph.nodes["helper-internal==1.0"]
build_manager.mark_node_built(helper_internal)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "tool-build" in buildable_names
tool_build = test_graph.nodes["tool-build==1.0"]
build_manager.mark_node_built(tool_build)
lib_shared = test_graph.nodes["lib-shared==1.0"]
build_manager.mark_node_built(lib_shared)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" in buildable_names
def test_circular_dependency_detection(self, build_manager, test_graph):
"""Test that circular dependencies are detected."""
# Create a scenario where no nodes can be built but some remain
# Mark all nodes as remaining but create impossible conditions
# Remove all leaf nodes from remaining nodes but don't mark them as built
# This simulates a circular dependency scenario where no nodes can be built
build_manager._remaining_nodes = [
node
for node in build_manager._remaining_nodes
if node.canonicalized_name
not in ["util-base", "helper-internal", "lib-shared"]
]
with pytest.raises(ValueError, match="Circular dependency detected"):
build_manager.get_nodes_ready_to_build()
@patch("fromager.commands.build.logger")
def test_logging_messages(self, mock_logger, build_manager, test_graph):
"""Test that appropriate logging messages are generated."""
build_manager.get_nodes_ready_to_build()
mock_logger.info.assert_called()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
assert any("ready to build" in msg for msg in log_calls)
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
mock_logger.reset_mock()
build_manager.get_nodes_ready_to_build()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
assert any("waiting for" in msg for msg in log_calls)
class TestExclusiveBuildHandling:
"""Test suite specifically for exclusive build handling."""
def create_exclusive_build_graph(self):
"""Create a graph where some nodes require exclusive builds."""
graph = dependency_graph.DependencyGraph()
for name in ["normal_a", "normal_b", "exclusive_c"]:
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement(f"{name}==1.0"),
req_version=Version("1.0"),
download_url=f"http://example.com/{name}-1.0.tar.gz",
)
return graph
@pytest.fixture
def exclusive_build_manager(self, mock_context):
"""Create a manager with exclusive build settings."""
graph = self.create_exclusive_build_graph()
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = name == "exclusive-c"
return mock_pbi
mock_context.settings.package_build_info = mock_package_build_info
return ParallelBuildManager(mock_context, graph)
def test_exclusive_build_isolation(self, exclusive_build_manager):
"""Test that exclusive build nodes are not mixed with other nodes."""
buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
assert len(buildable_nodes) == 1
assert buildable_nodes[0].canonicalized_name == "exclusive-c"
def test_normal_nodes_built_together_when_no_exclusive(
self, exclusive_build_manager
):
"""Test that normal nodes can be built together when no exclusive nodes are ready."""
exclusive_node = None
for node in exclusive_build_manager._remaining_nodes:
if node.canonicalized_name == "exclusive-c":
exclusive_node = node
break
assert exclusive_node is not None
exclusive_build_manager.mark_node_built(exclusive_node)
buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert len(buildable_nodes) == 2
assert "normal-a" in buildable_names
assert "normal-b" in buildable_names
assert "exclusive-c" not in buildable_names
def test_multiple_exclusive_nodes_only_first_selected(self, mock_context):
"""Test that when multiple exclusive nodes are ready, only the first is selected."""
graph = dependency_graph.DependencyGraph()
for name in ["exclusive_a", "exclusive_b", "normal_c"]:
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement(f"{name}==1.0"),
req_version=Version("1.0"),
download_url=f"http://example.com/{name}-1.0.tar.gz",
)
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = name.startswith("exclusive-")
return mock_pbi
mock_context.settings.package_build_info = mock_package_build_info
manager = ParallelBuildManager(mock_context, graph)
buildable_nodes = manager.get_nodes_ready_to_build()
assert len(buildable_nodes) == 1
assert buildable_nodes[0].canonicalized_name.startswith("exclusive-")
@patch("fromager.commands.build.logger")
def test_exclusive_build_logging(self, mock_logger, exclusive_build_manager):
"""Test that exclusive build scenarios are logged appropriately."""
exclusive_build_manager.get_nodes_ready_to_build()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
exclusive_log_found = any(
"requires exclusive build" in msg for msg in log_calls
)
assert exclusive_log_found
This is going to be quite a lot of test updates. Let me create a helper function to make this easier and then update all the failing tests systematically. First, let me add a helper function at the top of the test class:
Let me add a helper method to the test class:
from unittest.mock import Mock, patch
import pytest
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from packaging.version import Version
from fromager import context, dependency_graph, requirements_file
from fromager.commands.build import ParallelBuildManager
def create_test_graph():
"""Create a test dependency graph with various dependency scenarios.
Graph structure:
- app_main: Top-level app (depends on lib_shared for install, tool_build for build)
- lib_shared: Shared library (depends on util_base for install, no build deps)
- util_base: Base utility (no dependencies - leaf node)
- tool_build: Build tool (depends on util_base for build and install)
- plugin_extra: Plugin (depends on lib_shared for install, tool_build for build)
- helper_internal: Internal helper (only used by tool_build, not top-level)
Expected build order: util_base → helper_internal → tool_build → lib_shared → (app_main, plugin_extra)
"""
graph = dependency_graph.DependencyGraph()
# Add top-level dependencies (directly requested by user)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("app_main==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/app_main-1.0.tar.gz",
)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("plugin_extra==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/plugin_extra-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("app_main"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_SYSTEM,
req=Requirement("tool_build==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/tool_build-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("app_main"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("lib_shared"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("util_base==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/util_base-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("plugin_extra"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_BACKEND,
req=Requirement("tool_build==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/tool_build-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("plugin_extra"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
# tool_build build dependencies (not top-level, only used by others)
graph.add_dependency(
parent_name=canonicalize_name("tool_build"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_SYSTEM,
req=Requirement("util_base==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/util_base-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("tool_build"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("helper_internal==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/helper_internal-1.0.tar.gz",
)
# util_base has no dependencies (leaf node)
# helper_internal has no dependencies (leaf node, not top-level)
return graph
@pytest.fixture
def mock_context():
"""Create a mock WorkContext for testing."""
ctx = Mock(spec=context.WorkContext)
ctx.settings = Mock()
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = False
return mock_pbi
ctx.settings.package_build_info = mock_package_build_info
return ctx
@pytest.fixture
def test_graph():
"""Provide the test dependency graph."""
return create_test_graph()
@pytest.fixture
def build_manager(mock_context, test_graph):
"""Create a ParallelBuildManager instance for testing."""
return ParallelBuildManager(mock_context, test_graph)
class TestParallelBuildManager:
"""Test suite for ParallelBuildManager class."""
def _get_all_buildable_nodes(self, build_manager):
"""Helper to collect all nodes from the generator."""
all_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_nodes.extend(batch)
return all_nodes
def test_initialization(self, build_manager, test_graph):
"""Test that the manager initializes correctly."""
assert build_manager.wkctx is not None
assert build_manager.graph == test_graph
assert len(build_manager.built_node_keys) == 0
assert len(build_manager._remaining_nodes) == 6 # 6 non-root nodes
assert build_manager.have_remaining_nodes()
def test_have_remaining_nodes_initially_true(self, build_manager):
"""Test that initially there are remaining nodes."""
assert build_manager.have_remaining_nodes()
def test_have_remaining_nodes_false_when_all_built(self, build_manager, test_graph):
"""Test that have_remaining_nodes returns False when all nodes are built."""
# Mark all nodes as built
for node in test_graph.nodes.values():
if node.key != dependency_graph.ROOT:
build_manager.mark_node_built(node)
assert not build_manager.have_remaining_nodes()
def test_mark_node_built(self, build_manager, test_graph):
"""Test marking nodes as built."""
util_base = test_graph.nodes["util-base==1.0"]
assert not build_manager.is_node_built(util_base)
assert util_base.key not in build_manager.built_node_keys
assert util_base in build_manager._remaining_nodes
build_manager.mark_node_built(util_base)
# Verify node is now marked as built
assert build_manager.is_node_built(util_base)
assert util_base.key in build_manager.built_node_keys
assert util_base not in build_manager._remaining_nodes
def test_nodes_with_no_dependencies_buildable_first(self, build_manager):
"""Test that leaf nodes (no dependencies) are buildable first."""
# Collect all nodes from all batches
all_buildable_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_buildable_nodes.extend(batch)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert (
len(all_buildable_nodes) == 3
) # util-base, helper-internal, and lib-shared (no build deps)
assert "util-base" in buildable_names
assert "helper-internal" in buildable_names
assert "lib-shared" in buildable_names
def test_node_with_build_dependencies(self, build_manager, test_graph):
"""Test that tool_build becomes buildable after its dependencies are built."""
# Collect all nodes from all batches
all_buildable_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_buildable_nodes.extend(batch)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert "util-base" in buildable_names
assert "helper-internal" in buildable_names
assert "tool-build" not in buildable_names
# Mark the ready nodes as built (only nodes returned by get_ready can be marked as done)
util_base = next(node for node in all_buildable_nodes if node.canonicalized_name == "util-base")
helper_internal = next(node for node in all_buildable_nodes if node.canonicalized_name == "helper-internal")
build_manager.mark_node_built(util_base)
build_manager.mark_node_built(helper_internal)
# Get next batch of buildable nodes
all_buildable_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_buildable_nodes.extend(batch)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert "tool-build" in buildable_names
def test_transitive_dependencies(self, build_manager, test_graph):
"""Test that transitive dependencies are handled correctly.
app_main depends on tool_build for build, and tool_build depends on helper_internal for install.
So app_main should only be buildable after util_base, helper_internal, and tool_build are built.
"""
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" not in buildable_names
assert (
"helper-internal" in buildable_names
) # helper_internal should still be buildable
helper_internal = test_graph.nodes["helper-internal==1.0"]
build_manager.mark_node_built(helper_internal)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "tool-build" in buildable_names
assert "app-main" not in buildable_names
tool_build = test_graph.nodes["tool-build==1.0"]
build_manager.mark_node_built(tool_build)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "lib-shared" in buildable_names
def test_multiple_build_dependencies(self, build_manager, test_graph):
"""Test that app_main waits for all its dependencies."""
# app_main depends on tool_build for build and lib_shared for install
# tool_build depends on util_base and helper_internal
# lib_shared depends on util_base
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" not in buildable_names
helper_internal = test_graph.nodes["helper-internal==1.0"]
build_manager.mark_node_built(helper_internal)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "tool-build" in buildable_names
tool_build = test_graph.nodes["tool-build==1.0"]
build_manager.mark_node_built(tool_build)
lib_shared = test_graph.nodes["lib-shared==1.0"]
build_manager.mark_node_built(lib_shared)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" in buildable_names
def test_circular_dependency_detection(self, build_manager, test_graph):
"""Test that circular dependencies are detected."""
# Create a scenario where no nodes can be built but some remain
# Mark all nodes as remaining but create impossible conditions
# Remove all leaf nodes from remaining nodes but don't mark them as built
# This simulates a circular dependency scenario where no nodes can be built
build_manager._remaining_nodes = [
node
for node in build_manager._remaining_nodes
if node.canonicalized_name
not in ["util-base", "helper-internal", "lib-shared"]
]
with pytest.raises(ValueError, match="Circular dependency detected"):
build_manager.get_nodes_ready_to_build()
@patch("fromager.commands.build.logger")
def test_logging_messages(self, mock_logger, build_manager, test_graph):
"""Test that appropriate logging messages are generated."""
build_manager.get_nodes_ready_to_build()
mock_logger.info.assert_called()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
assert any("ready to build" in msg for msg in log_calls)
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
mock_logger.reset_mock()
build_manager.get_nodes_ready_to_build()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
assert any("waiting for" in msg for msg in log_calls)
class TestExclusiveBuildHandling:
"""Test suite specifically for exclusive build handling."""
def create_exclusive_build_graph(self):
"""Create a graph where some nodes require exclusive builds."""
graph = dependency_graph.DependencyGraph()
for name in ["normal_a", "normal_b", "exclusive_c"]:
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement(f"{name}==1.0"),
req_version=Version("1.0"),
download_url=f"http://example.com/{name}-1.0.tar.gz",
)
return graph
@pytest.fixture
def exclusive_build_manager(self, mock_context):
"""Create a manager with exclusive build settings."""
graph = self.create_exclusive_build_graph()
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = name == "exclusive-c"
return mock_pbi
mock_context.settings.package_build_info = mock_package_build_info
return ParallelBuildManager(mock_context, graph)
def test_exclusive_build_isolation(self, exclusive_build_manager):
"""Test that exclusive build nodes are not mixed with other nodes."""
buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
assert len(buildable_nodes) == 1
assert buildable_nodes[0].canonicalized_name == "exclusive-c"
def test_normal_nodes_built_together_when_no_exclusive(
self, exclusive_build_manager
):
"""Test that normal nodes can be built together when no exclusive nodes are ready."""
exclusive_node = None
for node in exclusive_build_manager._remaining_nodes:
if node.canonicalized_name == "exclusive-c":
exclusive_node = node
break
assert exclusive_node is not None
exclusive_build_manager.mark_node_built(exclusive_node)
buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert len(buildable_nodes) == 2
assert "normal-a" in buildable_names
assert "normal-b" in buildable_names
assert "exclusive-c" not in buildable_names
def test_multiple_exclusive_nodes_only_first_selected(self, mock_context):
"""Test that when multiple exclusive nodes are ready, only the first is selected."""
graph = dependency_graph.DependencyGraph()
for name in ["exclusive_a", "exclusive_b", "normal_c"]:
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement(f"{name}==1.0"),
req_version=Version("1.0"),
download_url=f"http://example.com/{name}-1.0.tar.gz",
)
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = name.startswith("exclusive-")
return mock_pbi
mock_context.settings.package_build_info = mock_package_build_info
manager = ParallelBuildManager(mock_context, graph)
buildable_nodes = manager.get_nodes_ready_to_build()
assert len(buildable_nodes) == 1
assert buildable_nodes[0].canonicalized_name.startswith("exclusive-")
@patch("fromager.commands.build.logger")
def test_exclusive_build_logging(self, mock_logger, exclusive_build_manager):
"""Test that exclusive build scenarios are logged appropriately."""
exclusive_build_manager.get_nodes_ready_to_build()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
exclusive_log_found = any(
"requires exclusive build" in msg for msg in log_calls
)
assert exclusive_log_found
Now let me update the first test to use the helper:
from unittest.mock import Mock, patch
import pytest
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from packaging.version import Version
from fromager import context, dependency_graph, requirements_file
from fromager.commands.build import ParallelBuildManager
def create_test_graph():
"""Create a test dependency graph with various dependency scenarios.
Graph structure:
- app_main: Top-level app (depends on lib_shared for install, tool_build for build)
- lib_shared: Shared library (depends on util_base for install, no build deps)
- util_base: Base utility (no dependencies - leaf node)
- tool_build: Build tool (depends on util_base for build and install)
- plugin_extra: Plugin (depends on lib_shared for install, tool_build for build)
- helper_internal: Internal helper (only used by tool_build, not top-level)
Expected build order: util_base → helper_internal → tool_build → lib_shared → (app_main, plugin_extra)
"""
graph = dependency_graph.DependencyGraph()
# Add top-level dependencies (directly requested by user)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("app_main==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/app_main-1.0.tar.gz",
)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("plugin_extra==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/plugin_extra-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("app_main"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_SYSTEM,
req=Requirement("tool_build==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/tool_build-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("app_main"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("lib_shared"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("util_base==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/util_base-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("plugin_extra"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_BACKEND,
req=Requirement("tool_build==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/tool_build-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("plugin_extra"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
# tool_build build dependencies (not top-level, only used by others)
graph.add_dependency(
parent_name=canonicalize_name("tool_build"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_SYSTEM,
req=Requirement("util_base==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/util_base-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("tool_build"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("helper_internal==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/helper_internal-1.0.tar.gz",
)
# util_base has no dependencies (leaf node)
# helper_internal has no dependencies (leaf node, not top-level)
return graph
@pytest.fixture
def mock_context():
"""Create a mock WorkContext for testing."""
ctx = Mock(spec=context.WorkContext)
ctx.settings = Mock()
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = False
return mock_pbi
ctx.settings.package_build_info = mock_package_build_info
return ctx
@pytest.fixture
def test_graph():
"""Provide the test dependency graph."""
return create_test_graph()
@pytest.fixture
def build_manager(mock_context, test_graph):
"""Create a ParallelBuildManager instance for testing."""
return ParallelBuildManager(mock_context, test_graph)
class TestParallelBuildManager:
"""Test suite for ParallelBuildManager class."""
def _get_all_buildable_nodes(self, build_manager):
"""Helper to collect all nodes from the generator."""
all_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_nodes.extend(batch)
return all_nodes
def test_initialization(self, build_manager, test_graph):
"""Test that the manager initializes correctly."""
assert build_manager.wkctx is not None
assert build_manager.graph == test_graph
assert len(build_manager.built_node_keys) == 0
assert len(build_manager._remaining_nodes) == 6 # 6 non-root nodes
assert build_manager.have_remaining_nodes()
def test_have_remaining_nodes_initially_true(self, build_manager):
"""Test that initially there are remaining nodes."""
assert build_manager.have_remaining_nodes()
def test_have_remaining_nodes_false_when_all_built(self, build_manager, test_graph):
"""Test that have_remaining_nodes returns False when all nodes are built."""
# Mark all nodes as built
for node in test_graph.nodes.values():
if node.key != dependency_graph.ROOT:
build_manager.mark_node_built(node)
assert not build_manager.have_remaining_nodes()
def test_mark_node_built(self, build_manager, test_graph):
"""Test marking nodes as built."""
util_base = test_graph.nodes["util-base==1.0"]
assert not build_manager.is_node_built(util_base)
assert util_base.key not in build_manager.built_node_keys
assert util_base in build_manager._remaining_nodes
build_manager.mark_node_built(util_base)
# Verify node is now marked as built
assert build_manager.is_node_built(util_base)
assert util_base.key in build_manager.built_node_keys
assert util_base not in build_manager._remaining_nodes
def test_nodes_with_no_dependencies_buildable_first(self, build_manager):
"""Test that leaf nodes (no dependencies) are buildable first."""
all_buildable_nodes = self._get_all_buildable_nodes(build_manager)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert (
len(all_buildable_nodes) == 3
) # util-base, helper-internal, and lib-shared (no build deps)
assert "util-base" in buildable_names
assert "helper-internal" in buildable_names
assert "lib-shared" in buildable_names
def test_node_with_build_dependencies(self, build_manager, test_graph):
"""Test that tool_build becomes buildable after its dependencies are built."""
# Collect all nodes from all batches
all_buildable_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_buildable_nodes.extend(batch)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert "util-base" in buildable_names
assert "helper-internal" in buildable_names
assert "tool-build" not in buildable_names
# Mark the ready nodes as built (only nodes returned by get_ready can be marked as done)
util_base = next(node for node in all_buildable_nodes if node.canonicalized_name == "util-base")
helper_internal = next(node for node in all_buildable_nodes if node.canonicalized_name == "helper-internal")
build_manager.mark_node_built(util_base)
build_manager.mark_node_built(helper_internal)
# Get next batch of buildable nodes
all_buildable_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_buildable_nodes.extend(batch)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert "tool-build" in buildable_names
def test_transitive_dependencies(self, build_manager, test_graph):
"""Test that transitive dependencies are handled correctly.
app_main depends on tool_build for build, and tool_build depends on helper_internal for install.
So app_main should only be buildable after util_base, helper_internal, and tool_build are built.
"""
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" not in buildable_names
assert (
"helper-internal" in buildable_names
) # helper_internal should still be buildable
helper_internal = test_graph.nodes["helper-internal==1.0"]
build_manager.mark_node_built(helper_internal)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "tool-build" in buildable_names
assert "app-main" not in buildable_names
tool_build = test_graph.nodes["tool-build==1.0"]
build_manager.mark_node_built(tool_build)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "lib-shared" in buildable_names
def test_multiple_build_dependencies(self, build_manager, test_graph):
"""Test that app_main waits for all its dependencies."""
# app_main depends on tool_build for build and lib_shared for install
# tool_build depends on util_base and helper_internal
# lib_shared depends on util_base
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" not in buildable_names
helper_internal = test_graph.nodes["helper-internal==1.0"]
build_manager.mark_node_built(helper_internal)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "tool-build" in buildable_names
tool_build = test_graph.nodes["tool-build==1.0"]
build_manager.mark_node_built(tool_build)
lib_shared = test_graph.nodes["lib-shared==1.0"]
build_manager.mark_node_built(lib_shared)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" in buildable_names
def test_circular_dependency_detection(self, build_manager, test_graph):
"""Test that circular dependencies are detected."""
# Create a scenario where no nodes can be built but some remain
# Mark all nodes as remaining but create impossible conditions
# Remove all leaf nodes from remaining nodes but don't mark them as built
# This simulates a circular dependency scenario where no nodes can be built
build_manager._remaining_nodes = [
node
for node in build_manager._remaining_nodes
if node.canonicalized_name
not in ["util-base", "helper-internal", "lib-shared"]
]
with pytest.raises(ValueError, match="Circular dependency detected"):
build_manager.get_nodes_ready_to_build()
@patch("fromager.commands.build.logger")
def test_logging_messages(self, mock_logger, build_manager, test_graph):
"""Test that appropriate logging messages are generated."""
build_manager.get_nodes_ready_to_build()
mock_logger.info.assert_called()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
assert any("ready to build" in msg for msg in log_calls)
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
mock_logger.reset_mock()
build_manager.get_nodes_ready_to_build()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
assert any("waiting for" in msg for msg in log_calls)
class TestExclusiveBuildHandling:
"""Test suite specifically for exclusive build handling."""
def create_exclusive_build_graph(self):
"""Create a graph where some nodes require exclusive builds."""
graph = dependency_graph.DependencyGraph()
for name in ["normal_a", "normal_b", "exclusive_c"]:
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement(f"{name}==1.0"),
req_version=Version("1.0"),
download_url=f"http://example.com/{name}-1.0.tar.gz",
)
return graph
@pytest.fixture
def exclusive_build_manager(self, mock_context):
"""Create a manager with exclusive build settings."""
graph = self.create_exclusive_build_graph()
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = name == "exclusive-c"
return mock_pbi
mock_context.settings.package_build_info = mock_package_build_info
return ParallelBuildManager(mock_context, graph)
def test_exclusive_build_isolation(self, exclusive_build_manager):
"""Test that exclusive build nodes are not mixed with other nodes."""
buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
assert len(buildable_nodes) == 1
assert buildable_nodes[0].canonicalized_name == "exclusive-c"
def test_normal_nodes_built_together_when_no_exclusive(
self, exclusive_build_manager
):
"""Test that normal nodes can be built together when no exclusive nodes are ready."""
exclusive_node = None
for node in exclusive_build_manager._remaining_nodes:
if node.canonicalized_name == "exclusive-c":
exclusive_node = node
break
assert exclusive_node is not None
exclusive_build_manager.mark_node_built(exclusive_node)
buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert len(buildable_nodes) == 2
assert "normal-a" in buildable_names
assert "normal-b" in buildable_names
assert "exclusive-c" not in buildable_names
def test_multiple_exclusive_nodes_only_first_selected(self, mock_context):
"""Test that when multiple exclusive nodes are ready, only the first is selected."""
graph = dependency_graph.DependencyGraph()
for name in ["exclusive_a", "exclusive_b", "normal_c"]:
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement(f"{name}==1.0"),
req_version=Version("1.0"),
download_url=f"http://example.com/{name}-1.0.tar.gz",
)
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = name.startswith("exclusive-")
return mock_pbi
mock_context.settings.package_build_info = mock_package_build_info
manager = ParallelBuildManager(mock_context, graph)
buildable_nodes = manager.get_nodes_ready_to_build()
assert len(buildable_nodes) == 1
assert buildable_nodes[0].canonicalized_name.startswith("exclusive-")
@patch("fromager.commands.build.logger")
def test_exclusive_build_logging(self, mock_logger, exclusive_build_manager):
"""Test that exclusive build scenarios are logged appropriately."""
exclusive_build_manager.get_nodes_ready_to_build()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
exclusive_log_found = any(
"requires exclusive build" in msg for msg in log_calls
)
assert exclusive_log_found
Now let me update the second test method:
from unittest.mock import Mock, patch
import pytest
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from packaging.version import Version
from fromager import context, dependency_graph, requirements_file
from fromager.commands.build import ParallelBuildManager
def create_test_graph():
"""Create a test dependency graph with various dependency scenarios.
Graph structure:
- app_main: Top-level app (depends on lib_shared for install, tool_build for build)
- lib_shared: Shared library (depends on util_base for install, no build deps)
- util_base: Base utility (no dependencies - leaf node)
- tool_build: Build tool (depends on util_base for build and install)
- plugin_extra: Plugin (depends on lib_shared for install, tool_build for build)
- helper_internal: Internal helper (only used by tool_build, not top-level)
Expected build order: util_base → helper_internal → tool_build → lib_shared → (app_main, plugin_extra)
"""
graph = dependency_graph.DependencyGraph()
# Add top-level dependencies (directly requested by user)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("app_main==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/app_main-1.0.tar.gz",
)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.TOP_LEVEL,
req=Requirement("plugin_extra==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/plugin_extra-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("app_main"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_SYSTEM,
req=Requirement("tool_build==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/tool_build-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("app_main"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("lib_shared"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("util_base==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/util_base-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("plugin_extra"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_BACKEND,
req=Requirement("tool_build==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/tool_build-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("plugin_extra"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("lib_shared==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/lib_shared-1.0.tar.gz",
)
# tool_build build dependencies (not top-level, only used by others)
graph.add_dependency(
parent_name=canonicalize_name("tool_build"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.BUILD_SYSTEM,
req=Requirement("util_base==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/util_base-1.0.tar.gz",
)
graph.add_dependency(
parent_name=canonicalize_name("tool_build"),
parent_version=Version("1.0"),
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement("helper_internal==1.0"),
req_version=Version("1.0"),
download_url="http://example.com/helper_internal-1.0.tar.gz",
)
# util_base has no dependencies (leaf node)
# helper_internal has no dependencies (leaf node, not top-level)
return graph
@pytest.fixture
def mock_context():
"""Create a mock WorkContext for testing."""
ctx = Mock(spec=context.WorkContext)
ctx.settings = Mock()
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = False
return mock_pbi
ctx.settings.package_build_info = mock_package_build_info
return ctx
@pytest.fixture
def test_graph():
"""Provide the test dependency graph."""
return create_test_graph()
@pytest.fixture
def build_manager(mock_context, test_graph):
"""Create a ParallelBuildManager instance for testing."""
return ParallelBuildManager(mock_context, test_graph)
class TestParallelBuildManager:
"""Test suite for ParallelBuildManager class."""
def _get_all_buildable_nodes(self, build_manager):
"""Helper to collect all nodes from the generator."""
all_nodes = []
for batch in build_manager.get_nodes_ready_to_build():
all_nodes.extend(batch)
return all_nodes
def test_initialization(self, build_manager, test_graph):
"""Test that the manager initializes correctly."""
assert build_manager.wkctx is not None
assert build_manager.graph == test_graph
assert len(build_manager.built_node_keys) == 0
assert len(build_manager._remaining_nodes) == 6 # 6 non-root nodes
assert build_manager.have_remaining_nodes()
def test_have_remaining_nodes_initially_true(self, build_manager):
"""Test that initially there are remaining nodes."""
assert build_manager.have_remaining_nodes()
def test_have_remaining_nodes_false_when_all_built(self, build_manager, test_graph):
"""Test that have_remaining_nodes returns False when all nodes are built."""
# Mark all nodes as built
for node in test_graph.nodes.values():
if node.key != dependency_graph.ROOT:
build_manager.mark_node_built(node)
assert not build_manager.have_remaining_nodes()
def test_mark_node_built(self, build_manager, test_graph):
"""Test marking nodes as built."""
util_base = test_graph.nodes["util-base==1.0"]
assert not build_manager.is_node_built(util_base)
assert util_base.key not in build_manager.built_node_keys
assert util_base in build_manager._remaining_nodes
build_manager.mark_node_built(util_base)
# Verify node is now marked as built
assert build_manager.is_node_built(util_base)
assert util_base.key in build_manager.built_node_keys
assert util_base not in build_manager._remaining_nodes
def test_nodes_with_no_dependencies_buildable_first(self, build_manager):
"""Test that leaf nodes (no dependencies) are buildable first."""
all_buildable_nodes = self._get_all_buildable_nodes(build_manager)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert (
len(all_buildable_nodes) == 3
) # util-base, helper-internal, and lib-shared (no build deps)
assert "util-base" in buildable_names
assert "helper-internal" in buildable_names
assert "lib-shared" in buildable_names
def test_node_with_build_dependencies(self, build_manager, test_graph):
"""Test that tool_build becomes buildable after its dependencies are built."""
all_buildable_nodes = self._get_all_buildable_nodes(build_manager)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert "util-base" in buildable_names
assert "helper-internal" in buildable_names
assert "tool-build" not in buildable_names
# Mark the ready nodes as built (only nodes returned by get_ready can be marked as done)
util_base = next(node for node in all_buildable_nodes if node.canonicalized_name == "util-base")
helper_internal = next(node for node in all_buildable_nodes if node.canonicalized_name == "helper-internal")
build_manager.mark_node_built(util_base)
build_manager.mark_node_built(helper_internal)
# Get next batch of buildable nodes
all_buildable_nodes = self._get_all_buildable_nodes(build_manager)
buildable_names = {node.canonicalized_name for node in all_buildable_nodes}
assert "tool-build" in buildable_names
def test_transitive_dependencies(self, build_manager, test_graph):
"""Test that transitive dependencies are handled correctly.
app_main depends on tool_build for build, and tool_build depends on helper_internal for install.
So app_main should only be buildable after util_base, helper_internal, and tool_build are built.
"""
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" not in buildable_names
assert (
"helper-internal" in buildable_names
) # helper_internal should still be buildable
helper_internal = test_graph.nodes["helper-internal==1.0"]
build_manager.mark_node_built(helper_internal)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "tool-build" in buildable_names
assert "app-main" not in buildable_names
tool_build = test_graph.nodes["tool-build==1.0"]
build_manager.mark_node_built(tool_build)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "lib-shared" in buildable_names
def test_multiple_build_dependencies(self, build_manager, test_graph):
"""Test that app_main waits for all its dependencies."""
# app_main depends on tool_build for build and lib_shared for install
# tool_build depends on util_base and helper_internal
# lib_shared depends on util_base
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" not in buildable_names
helper_internal = test_graph.nodes["helper-internal==1.0"]
build_manager.mark_node_built(helper_internal)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "tool-build" in buildable_names
tool_build = test_graph.nodes["tool-build==1.0"]
build_manager.mark_node_built(tool_build)
lib_shared = test_graph.nodes["lib-shared==1.0"]
build_manager.mark_node_built(lib_shared)
buildable_nodes = build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert "app-main" in buildable_names
def test_circular_dependency_detection(self, build_manager, test_graph):
"""Test that circular dependencies are detected."""
# Create a scenario where no nodes can be built but some remain
# Mark all nodes as remaining but create impossible conditions
# Remove all leaf nodes from remaining nodes but don't mark them as built
# This simulates a circular dependency scenario where no nodes can be built
build_manager._remaining_nodes = [
node
for node in build_manager._remaining_nodes
if node.canonicalized_name
not in ["util-base", "helper-internal", "lib-shared"]
]
with pytest.raises(ValueError, match="Circular dependency detected"):
build_manager.get_nodes_ready_to_build()
@patch("fromager.commands.build.logger")
def test_logging_messages(self, mock_logger, build_manager, test_graph):
"""Test that appropriate logging messages are generated."""
build_manager.get_nodes_ready_to_build()
mock_logger.info.assert_called()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
assert any("ready to build" in msg for msg in log_calls)
util_base = test_graph.nodes["util-base==1.0"]
build_manager.mark_node_built(util_base)
mock_logger.reset_mock()
build_manager.get_nodes_ready_to_build()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
assert any("waiting for" in msg for msg in log_calls)
class TestExclusiveBuildHandling:
"""Test suite specifically for exclusive build handling."""
def create_exclusive_build_graph(self):
"""Create a graph where some nodes require exclusive builds."""
graph = dependency_graph.DependencyGraph()
for name in ["normal_a", "normal_b", "exclusive_c"]:
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement(f"{name}==1.0"),
req_version=Version("1.0"),
download_url=f"http://example.com/{name}-1.0.tar.gz",
)
return graph
@pytest.fixture
def exclusive_build_manager(self, mock_context):
"""Create a manager with exclusive build settings."""
graph = self.create_exclusive_build_graph()
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = name == "exclusive-c"
return mock_pbi
mock_context.settings.package_build_info = mock_package_build_info
return ParallelBuildManager(mock_context, graph)
def test_exclusive_build_isolation(self, exclusive_build_manager):
"""Test that exclusive build nodes are not mixed with other nodes."""
buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
assert len(buildable_nodes) == 1
assert buildable_nodes[0].canonicalized_name == "exclusive-c"
def test_normal_nodes_built_together_when_no_exclusive(
self, exclusive_build_manager
):
"""Test that normal nodes can be built together when no exclusive nodes are ready."""
exclusive_node = None
for node in exclusive_build_manager._remaining_nodes:
if node.canonicalized_name == "exclusive-c":
exclusive_node = node
break
assert exclusive_node is not None
exclusive_build_manager.mark_node_built(exclusive_node)
buildable_nodes = exclusive_build_manager.get_nodes_ready_to_build()
buildable_names = {node.canonicalized_name for node in buildable_nodes}
assert len(buildable_nodes) == 2
assert "normal-a" in buildable_names
assert "normal-b" in buildable_names
assert "exclusive-c" not in buildable_names
def test_multiple_exclusive_nodes_only_first_selected(self, mock_context):
"""Test that when multiple exclusive nodes are ready, only the first is selected."""
graph = dependency_graph.DependencyGraph()
for name in ["exclusive_a", "exclusive_b", "normal_c"]:
graph.add_dependency(
parent_name=None,
parent_version=None,
req_type=requirements_file.RequirementType.INSTALL,
req=Requirement(f"{name}==1.0"),
req_version=Version("1.0"),
download_url=f"http://example.com/{name}-1.0.tar.gz",
)
def mock_package_build_info(name):
mock_pbi = Mock()
mock_pbi.exclusive_build = name.startswith("exclusive-")
return mock_pbi
mock_context.settings.package_build_info = mock_package_build_info
manager = ParallelBuildManager(mock_context, graph)
buildable_nodes = manager.get_nodes_ready_to_build()
assert len(buildable_nodes) == 1
assert buildable_nodes[0].canonicalized_name.startswith("exclusive-")
@patch("fromager.commands.build.logger")
def test_exclusive_build_logging(self, mock_logger, exclusive_build_manager):
"""Test that exclusive build scenarios are logged appropriately."""
exclusive_build_manager.get_nodes_ready_to_build()
log_calls = [call.args[0] for call in mock_logger.info.call_args_list]
exclusive_log_found = any(
"requires exclusive build" in msg for msg in log_calls
)
assert exclusive_log_found
This is taking too long to fix all tests individually. Let me run a targeted test to see if my approach is working, and then I can fix the remaining tests more efficiently. Let me run just one test to see if the pattern works:
I see the issue! The topological sorter is including the ROOT node (with empty canonicalized_name ''
). I need to filter out the ROOT node from the buildable nodes. Let me fix this in the get_nodes_ready_to_build
method:
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
self.install_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_install_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
# Filter out the ROOT node as it's not a real package to build
buildable_nodes = [
node for node in self.build_sorter.get_ready()
if node.key != dependency_graph.ROOT
]
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(f"{node.canonicalized_name}: requires exclusive build")
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"ready to build: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph because it has
# been built and can be installed when its installation dependencies are
# built.
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
# because they are now completely done and are available for building
# other packages.
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph so
# that packages that depend on them can be built.
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes),
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now let me test this fix:
User
Instead of filtering it out in ParallelBuildManager, change the methods of the DependencyGraph to not include the root node in the first place.
Cursor
You're absolutely right! It's better to fix this at the source in the DependencyGraph
methods rather than filtering it out in the ParallelBuildManager
. Let me update the topological sorter methods to exclude the ROOT node from the beginning.
First, let me revert the filter I just added to the ParallelBuildManager:
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
self.install_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_install_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
buildable_nodes = list(self.build_sorter.get_ready())
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(f"{node.canonicalized_name}: requires exclusive build")
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"ready to build: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the install dependency graph because it has
# been built and can be installed when its installation dependencies are
# built.
self.install_sorter.done(node)
# Get all nodes that are now ready in the install dependency graph
# because they are now completely done and are available for building
# other packages.
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph so
# that packages that depend on them can be built.
for install_node in ready_install_nodes:
self.build_sorter.done(install_node)
logger.debug(
"marked %s as built, marked %d install-ready nodes as done %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes),
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now let me update the DependencyGraph
methods to exclude the ROOT node when adding nodes to the topological sorters:
from __future__ import annotations
import dataclasses
import graphlib
import json
import logging
import pathlib
import typing
from packaging.requirements import Requirement
from packaging.utils import NormalizedName, canonicalize_name
from packaging.version import Version
from .read import open_file_or_url
from .requirements_file import RequirementType
logger = logging.getLogger(__name__)
ROOT = ""
class DependencyEdgeDict(typing.TypedDict):
req_type: str
req: str
key: str
class DependencyNodeDict(typing.TypedDict):
download_url: str
canonicalized_name: str
version: str
pre_built: bool
edges: list[DependencyEdgeDict]
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyNode:
canonicalized_name: NormalizedName
version: Version
download_url: str = dataclasses.field(default="", compare=False)
pre_built: bool = dataclasses.field(default=False, compare=False)
# additional fields
key: str = dataclasses.field(init=False, compare=False, repr=False)
parents: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
children: list[DependencyEdge] = dataclasses.field(
default_factory=list,
init=False,
compare=False,
repr=False,
)
def __post_init__(self) -> None:
if self.canonicalized_name == ROOT:
# root has a special key
object.__setattr__(self, "key", ROOT)
else:
object.__setattr__(
self, "key", f"{self.canonicalized_name}=={self.version}"
)
def add_child(
self,
child: DependencyNode,
req: Requirement,
req_type: RequirementType,
) -> None:
current_to_child_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=child
)
self.children.append(current_to_child_edge)
child_to_current_edge = DependencyEdge(
req=req, req_type=req_type, destination_node=self
)
# will create a cyclic dependency in memory, which will make it harder to garbage collect
# not an issue for fromager since it is used as a short-lived process
child.parents.append(child_to_current_edge)
def to_dict(self) -> DependencyNodeDict:
return {
"download_url": self.download_url,
"pre_built": self.pre_built,
"version": str(self.version),
"canonicalized_name": str(self.canonicalized_name),
"edges": [edge.to_dict() for edge in self.children],
}
def get_incoming_install_edges(self) -> list[DependencyEdge]:
return [
edge for edge in self.parents if edge.req_type == RequirementType.INSTALL
]
def get_outgoing_edges(
self, req_name: str, req_type: RequirementType
) -> list[DependencyEdge]:
return [
edge
for edge in self.children
if canonicalize_name(edge.req.name) == canonicalize_name(req_name)
and edge.req_type == req_type
]
@classmethod
def construct_root_node(cls) -> DependencyNode:
return cls(
canonicalize_name(ROOT),
# version doesn't really matter for root
Version("0"),
)
@dataclasses.dataclass(frozen=True, order=True, slots=True)
class DependencyEdge:
key: str = dataclasses.field(init=False, repr=True, compare=True)
destination_node: DependencyNode = dataclasses.field(repr=False, compare=False)
req: Requirement = dataclasses.field(repr=True, compare=True)
req_type: RequirementType = dataclasses.field(repr=True, compare=True)
def __post_init__(self) -> None:
object.__setattr__(self, "key", self.destination_node.key)
def to_dict(self) -> DependencyEdgeDict:
return {
"key": self.key,
"req_type": str(self.req_type),
"req": str(self.req),
}
class DependencyGraph:
def __init__(self) -> None:
self.nodes: dict[str, DependencyNode] = {}
self.clear()
@classmethod
def from_file(
cls,
graph_file: pathlib.Path | str,
) -> DependencyGraph:
with open_file_or_url(graph_file) as f:
# TODO: add JSON validation to ensure it is a parsable graph json
raw_graph = typing.cast(dict[str, dict], json.load(f))
return cls.from_dict(raw_graph)
@classmethod
def from_dict(
cls,
graph_dict: dict[str, dict[str, typing.Any]],
) -> DependencyGraph:
graph = cls()
stack = [ROOT]
visited = set()
while stack:
curr_key = stack.pop()
if curr_key in visited:
continue
node_dict = typing.cast(DependencyNodeDict, graph_dict[curr_key])
parent_name = parent_version = None
if curr_key != ROOT:
parent_name = canonicalize_name(node_dict["canonicalized_name"])
parent_version = Version(node_dict["version"])
for raw_edge in node_dict["edges"]:
edge_dict = typing.cast(DependencyEdgeDict, raw_edge)
destination_node_dict = typing.cast(
DependencyNodeDict, graph_dict[edge_dict["key"]]
)
graph.add_dependency(
parent_name=parent_name,
parent_version=parent_version,
req_type=RequirementType(edge_dict["req_type"]),
req=Requirement(edge_dict["req"]),
req_version=Version(destination_node_dict["version"]),
download_url=destination_node_dict["download_url"],
pre_built=destination_node_dict["pre_built"],
)
stack.append(edge_dict["key"])
visited.add(curr_key)
return graph
def clear(self) -> None:
self.nodes.clear()
self.nodes[ROOT] = DependencyNode.construct_root_node()
def _to_dict(self):
raw_graph = {}
stack = [self.nodes[ROOT]]
visited = set()
while stack:
node = stack.pop()
raw_graph[node.key] = node.to_dict()
for child in node.children:
if child.destination_node.key not in visited:
stack.append(child.destination_node)
visited.add(node.key)
return raw_graph
def serialize(self, file_handle: typing.TextIO):
raw_graph = self._to_dict()
json.dump(raw_graph, file_handle, indent=2, default=str)
def _add_node(
self,
req_name: NormalizedName,
version: Version,
download_url: str,
pre_built: bool,
):
new_node = DependencyNode(
canonicalized_name=req_name,
version=version,
download_url=download_url,
pre_built=pre_built,
)
# check if a node with that key already exists. if it does then use that
node = self.nodes.get(new_node.key, new_node)
# store the node in case we are using new_node
self.nodes[node.key] = node
return node
def add_dependency(
self,
parent_name: NormalizedName | None,
parent_version: Version | None,
req_type: RequirementType,
req: Requirement,
req_version: Version,
download_url: str = "",
pre_built: bool = False,
) -> None:
logger.debug(
"recording %s dependency %s%s -> %s==%s",
req_type,
parent_name if parent_name else f"({RequirementType.TOP_LEVEL})",
f"=={parent_version}" if parent_version else "",
req.name,
req_version,
)
node = self._add_node(
req_name=canonicalize_name(req.name),
version=req_version,
download_url=download_url,
pre_built=pre_built,
)
parent_key = ROOT if parent_name is None else f"{parent_name}=={parent_version}"
if parent_key not in self.nodes:
raise ValueError(
f"Trying to add {node.key} to parent {parent_key} but {parent_key} does not exist"
)
self.nodes[parent_key].add_child(node, req=req, req_type=req_type)
def get_dependency_edges(
self, match_dep_types: list[RequirementType] | None = None
) -> typing.Iterable[DependencyEdge]:
visited = set()
for edge in self._depth_first_traversal(
self.nodes[ROOT].children,
set(),
match_dep_types=match_dep_types,
):
if edge.destination_node.key not in visited:
yield edge
visited.add(edge.destination_node.key)
def get_install_dependencies(self) -> typing.Iterable[DependencyNode]:
for edge in self.get_dependency_edges(
match_dep_types=[RequirementType.INSTALL, RequirementType.TOP_LEVEL]
):
yield edge.destination_node
def get_nodes_by_name(self, req_name: str | None) -> list[DependencyNode]:
if not req_name:
return [self.nodes[ROOT]]
return [
node
for node in self.get_all_nodes()
if node.canonicalized_name == canonicalize_name(req_name)
]
def get_root_node(self) -> DependencyNode:
return self.nodes[ROOT]
def get_all_nodes(self) -> typing.Iterable[DependencyNode]:
return self.nodes.values()
def get_install_dependency_versions(
self,
) -> dict[NormalizedName, list[DependencyNode]]:
all_versions: dict[NormalizedName, list[DependencyNode]] = {}
for node in self.get_install_dependencies():
all_versions.setdefault(node.canonicalized_name, []).append(node)
return all_versions
def _depth_first_traversal(
self,
start_edges: list[DependencyEdge],
visited: set[str],
match_dep_types: list[RequirementType] | None = None,
) -> typing.Iterable[DependencyEdge]:
for edge in start_edges:
if edge.destination_node.key in visited:
continue
if match_dep_types and edge.req_type not in match_dep_types:
continue
visited.add(edge.destination_node.key)
yield edge
yield from self._depth_first_traversal(
edge.destination_node.children, visited, match_dep_types
)
def _get_dependency_topological_sorter(
self,
requirement_filter: typing.Callable[[RequirementType], bool],
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for dependencies matching the filter.
Args:
requirement_filter: Function that returns True for requirement types to include
Returns:
A TopologicalSorter with all DependencyNode objects and filtered dependency relationships
"""
sorter: graphlib.TopologicalSorter[DependencyNode] = (
graphlib.TopologicalSorter()
)
# Add all nodes (except ROOT) and filtered dependency relationships
for node in self.get_all_nodes():
# Skip the ROOT node as it's not a real package to build
if node.key != ROOT:
sorter.add(node)
for edge in node.children:
if requirement_filter(edge.req_type):
# In topological sorting, if A depends on B, then B must come before A
# So we add (dependent=A, dependency=B)
# Skip self-dependencies to avoid cycles and skip ROOT node
if (node != edge.destination_node and
node.key != ROOT and
edge.destination_node.key != ROOT):
sorter.add(node, edge.destination_node)
sorter.prepare()
return sorter
def get_build_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the build dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing build-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent build dependencies (build-system, build-backend, build-sdist).
"""
return self._get_dependency_topological_sorter(
lambda req_type: req_type.is_build_requirement
)
def get_install_dependency_topological_sorter(
self,
) -> graphlib.TopologicalSorter[DependencyNode]:
"""Return a TopologicalSorter for the install dependencies in the graph.
The sorter contains DependencyNode objects and edges
representing install-time dependencies between packages.
Returns:
A TopologicalSorter where each node is a DependencyNode and edges
represent install dependencies (install, toplevel).
"""
return self._get_dependency_topological_sorter(
lambda req_type: req_type.is_install_requirement
)
Now let me test this fix by running the single test again:
Excellent! That test is now passing. Let me run the second test I fixed to see if it works too:
I see the issue. The problem is that the install_sorter
still includes the ROOT node and its dependencies, so when I try to call done()
on a node that was returned by the build_sorter.get_ready()
, it might not have been returned by install_sorter.get_ready()
yet.
The issue is in the mark_node_built
method - I need to make sure I only call done()
on nodes that were actually returned by get_ready()
from the respective sorters. Let me fix this by updating the logic:
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
self.install_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_install_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
buildable_nodes = list(self.build_sorter.get_ready())
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(f"{node.canonicalized_name}: requires exclusive build")
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"ready to build: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update both topological sorters."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the build dependency graph (it was returned by get_ready)
self.build_sorter.done(node)
# Check if this node is ready in the install sorter and mark it done there too
ready_install_nodes = list(self.install_sorter.get_ready())
if node in ready_install_nodes:
self.install_sorter.done(node)
# Get the updated list of ready install nodes after marking this one done
ready_install_nodes = list(self.install_sorter.get_ready())
# Mark all ready install nodes as done in the build dependency graph so
# that packages that depend on them can be built.
for install_node in ready_install_nodes:
# Only mark as done if it's not already done in build sorter
try:
self.build_sorter.done(install_node)
except ValueError:
# Node was already marked as done or not ready yet
pass
logger.debug(
"marked %s as built, marked %d install-ready nodes as done %s",
node.key,
len(ready_install_nodes),
sorted(n.key for n in ready_install_nodes),
)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now let me test this fix:
User
No, stop. That logic is not correct. We should not mark a node as done in the build_sorter until all of its installation dependencies are built. If we cannot call done() on a node in a sorter unless it has been returned by get_ready() then we need to track the installation dependencies some other way, not with a sorter.
Cursor
You're absolutely right! I was overcomplicating this. The issue is that we're trying to use two topological sorters in a way that doesn't match how they're designed to work.
The correct approach is:
- Use the build sorter to determine when nodes are ready to build (based on build dependencies)
- Use a different mechanism to track install dependencies, not another topological sorter
Let me revert the mark_node_built
method and implement a simpler approach that tracks install dependencies without using a second topological sorter:
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorters for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
self.install_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_install_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
buildable_nodes = list(self.build_sorter.get_ready())
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(f"{node.canonicalized_name}: requires exclusive build")
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"ready to build: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update the build sorter."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the build dependency graph (it was returned by get_ready)
self.build_sorter.done(node)
# Check if any nodes are now ready to build because their install dependencies are satisfied
# We need to mark nodes as done in the build sorter when all their install dependencies are built
self._update_build_sorter_for_install_dependencies()
logger.debug("marked %s as built", node.key)
def _update_build_sorter_for_install_dependencies(self) -> None:
"""Mark nodes as done in build sorter when their install dependencies are satisfied."""
# Get nodes that are ready in the build sorter but might be waiting for install deps
ready_build_nodes = list(self.build_sorter.get_ready())
for node in self.graph.get_all_nodes():
if node.key == dependency_graph.ROOT or self.is_node_built(node):
continue
# Check if all install dependencies of this node are built
install_deps_satisfied = True
for edge in node.children:
if edge.req_type.is_install_requirement:
if not self.is_node_built(edge.destination_node):
install_deps_satisfied = False
break
# If all install deps are satisfied and node is ready in build sorter,
# we can mark it as available for building by marking it done if it's ready
if install_deps_satisfied and node in ready_build_nodes:
# This node can now be built - it's already ready in build sorter
pass # No action needed, it's already in ready_build_nodes
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Actually, let me step back and think about this more clearly. The original requirement was:
"Nodes become ready to build when the build dependency graph says they are. When nodes are built, they should be marked done in the install dependency graph and then all of the ready nodes of the install dependency graph should be marked as done in the build dependency graph."
But you're right - this creates a circular dependency between the two sorters that doesn't work with the TopologicalSorter
API constraints.
Let me simplify this to use only the build dependency sorter and handle install dependencies through a different mechanism. Let me remove the install sorter entirely and implement a simpler approach:
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorter for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
buildable_nodes = list(self.build_sorter.get_ready())
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(f"{node.canonicalized_name}: requires exclusive build")
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"ready to build: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update the build sorter."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the build dependency graph (it was returned by get_ready)
self.build_sorter.done(node)
# Check if any nodes are now ready to build because their install dependencies are satisfied
# We need to mark nodes as done in the build sorter when all their install dependencies are built
self._update_build_sorter_for_install_dependencies()
logger.debug("marked %s as built", node.key)
def _update_build_sorter_for_install_dependencies(self) -> None:
"""Mark nodes as done in build sorter when their install dependencies are satisfied."""
# Get nodes that are ready in the build sorter but might be waiting for install deps
ready_build_nodes = list(self.build_sorter.get_ready())
for node in self.graph.get_all_nodes():
if node.key == dependency_graph.ROOT or self.is_node_built(node):
continue
# Check if all install dependencies of this node are built
install_deps_satisfied = True
for edge in node.children:
if edge.req_type.is_install_requirement:
if not self.is_node_built(edge.destination_node):
install_deps_satisfied = False
break
# If all install deps are satisfied and node is ready in build sorter,
# we can mark it as available for building by marking it done if it's ready
if install_deps_satisfied and node in ready_build_nodes:
# This node can now be built - it's already ready in build sorter
pass # No action needed, it's already in ready_build_nodes
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now let me simplify the mark_node_built
method to just use the build sorter and manually check install dependencies:
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorter for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
buildable_nodes = list(self.build_sorter.get_ready())
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(f"{node.canonicalized_name}: requires exclusive build")
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"ready to build: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update the build sorter."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the build dependency graph (it was returned by get_ready)
self.build_sorter.done(node)
logger.debug("marked %s as built", node.key)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now I need to update the get_nodes_ready_to_build
method to filter out nodes that don't have their install dependencies satisfied yet:
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorter for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
ready_from_build_sorter = list(self.build_sorter.get_ready())
# Filter to only include nodes whose install dependencies are also satisfied
buildable_nodes = []
for node in ready_from_build_sorter:
# Check if all install dependencies of this node are built
install_deps_satisfied = True
for edge in node.children:
if edge.req_type.is_install_requirement:
if not self.is_node_built(edge.destination_node):
install_deps_satisfied = False
break
if install_deps_satisfied:
buildable_nodes.append(node)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(f"{node.canonicalized_name}: requires exclusive build")
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"ready to build: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and update the build sorter."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Mark the node as done in the build dependency graph (it was returned by get_ready)
self.build_sorter.done(node)
logger.debug("marked %s as built", node.key)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now let me test this simplified approach:
User
Stop, no, that's wrong. mark_node_built() should remember that a node is built but should only mark it as done() when the installation dependencies of the node have also all been built. The method is still immediately calling done() on the build_sorter
Cursor
You're absolutely right! I'm still calling done()
immediately in mark_node_built()
. The correct logic should be:
- Remember that a node is built (add to
built_node_keys
) - Only call
done()
on the build sorter when ALL of that node's install dependencies are also built - Check other nodes to see if they can now be marked as done because their install dependencies are satisfied
Let me fix this:
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorter for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
ready_from_build_sorter = list(self.build_sorter.get_ready())
# Filter to only include nodes whose install dependencies are also satisfied
buildable_nodes = []
for node in ready_from_build_sorter:
# Check if all install dependencies of this node are built
install_deps_satisfied = True
for edge in node.children:
if edge.req_type.is_install_requirement:
if not self.is_node_built(edge.destination_node):
install_deps_satisfied = False
break
if install_deps_satisfied:
buildable_nodes.append(node)
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(f"{node.canonicalized_name}: requires exclusive build")
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"ready to build: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and check if any nodes can now be marked done in build sorter."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Check if this node can be marked as done in the build sorter
# (only if all its install dependencies are also built)
self._check_and_mark_done_in_build_sorter(node)
# Check if any other nodes can now be marked as done because this node was built
self._update_build_sorter_done_status()
logger.debug("marked %s as built", node.key)
def _check_and_mark_done_in_build_sorter(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as done in build sorter if all its install dependencies are built."""
# Check if all install dependencies of this node are built
for edge in node.children:
if edge.req_type.is_install_requirement:
if not self.is_node_built(edge.destination_node):
# Not all install dependencies are built yet
return
# All install dependencies are built, mark as done in build sorter
try:
self.build_sorter.done(node)
logger.debug("marked %s as done in build sorter (install deps satisfied)", node.key)
except ValueError:
# Node was not ready or already done
pass
def _update_build_sorter_done_status(self) -> None:
"""Check all built nodes to see if any can now be marked done in build sorter."""
for node in self.graph.get_all_nodes():
if node.key == dependency_graph.ROOT:
continue
if self.is_node_built(node):
self._check_and_mark_done_in_build_sorter(node)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now I can also remove the install dependency filtering from get_nodes_ready_to_build
since the build sorter will only return nodes that are truly ready (i.e., their install dependencies have been marked as done):
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorter for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
# These nodes have their build dependencies satisfied AND their install dependencies satisfied
# (because we only mark nodes as done in the build sorter when install deps are satisfied)
buildable_nodes = list(self.build_sorter.get_ready())
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(f"{node.canonicalized_name}: requires exclusive build")
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"ready to build: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built and check if any nodes can now be marked done in build sorter."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
# Check if this node can be marked as done in the build sorter
# (only if all its install dependencies are also built)
self._check_and_mark_done_in_build_sorter(node)
# Check if any other nodes can now be marked as done because this node was built
self._update_build_sorter_done_status()
logger.debug("marked %s as built", node.key)
def _check_and_mark_done_in_build_sorter(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as done in build sorter if all its install dependencies are built."""
# Check if all install dependencies of this node are built
for edge in node.children:
if edge.req_type.is_install_requirement:
if not self.is_node_built(edge.destination_node):
# Not all install dependencies are built yet
return
# All install dependencies are built, mark as done in build sorter
try:
self.build_sorter.done(node)
logger.debug("marked %s as done in build sorter (install deps satisfied)", node.key)
except ValueError:
# Node was not ready or already done
pass
def _update_build_sorter_done_status(self) -> None:
"""Check all built nodes to see if any can now be marked done in build sorter."""
for node in self.graph.get_all_nodes():
if node.key == dependency_graph.ROOT:
continue
if self.is_node_built(node):
self._check_and_mark_done_in_build_sorter(node)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Perfect! Now the logic is correct:
mark_node_built()
remembers that a node is built but only callsdone()
on the build sorter when all install dependencies are satisfiedget_nodes_ready_to_build()
simply gets nodes from the build sorter, which will only return nodes that are truly ready (both build and install dependencies satisfied)- The build sorter manages the proper ordering based on build dependencies, and we use the
done()
mechanism correctly by only calling it when install dependencies are also satisfied
Let me test this approach:
User
Instead of updating the sorter in mark_node_built, update it at the start of get_nodes_ready_to_build, before getting more ready nodes. This will be more efficient because it will iterate over the built nodes less often.
Cursor
Excellent point! That's much more efficient. Instead of updating the sorter every time a node is built, I'll update it once at the beginning of get_nodes_ready_to_build()
before getting the ready nodes. This way we only iterate over built nodes when we actually need to check for ready nodes.
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorter for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Get nodes that are ready to build from the build dependency sorter
# These nodes have their build dependencies satisfied AND their install dependencies satisfied
# (because we only mark nodes as done in the build sorter when install deps are satisfied)
buildable_nodes = list(self.build_sorter.get_ready())
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(f"{node.canonicalized_name}: requires exclusive build")
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"ready to build: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
logger.debug("marked %s as built", node.key)
def _check_and_mark_done_in_build_sorter(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as done in build sorter if all its install dependencies are built."""
# Check if all install dependencies of this node are built
for edge in node.children:
if edge.req_type.is_install_requirement:
if not self.is_node_built(edge.destination_node):
# Not all install dependencies are built yet
return
# All install dependencies are built, mark as done in build sorter
try:
self.build_sorter.done(node)
logger.debug("marked %s as done in build sorter (install deps satisfied)", node.key)
except ValueError:
# Node was not ready or already done
pass
def _update_build_sorter_done_status(self) -> None:
"""Check all built nodes to see if any can now be marked done in build sorter."""
for node in self.graph.get_all_nodes():
if node.key == dependency_graph.ROOT:
continue
if self.is_node_built(node):
self._check_and_mark_done_in_build_sorter(node)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.option(
"-m",
"--max-workers",
type=int,
default=None,
help="maximum number of parallel workers to run (default: unlimited)",
)
@click.argument("graph_file")
@click.pass_obj
def build_parallel(
wkctx: context.WorkContext,
graph_file: str,
force: bool,
cache_wheel_server_url: str | None,
max_workers: int | None,
) -> None:
"""Build wheels in parallel based on a dependency graph
GRAPH_FILE is a graph.json file containing the dependency relationships between packages
Performs parallel builds of wheels based on their dependency relationships.
Packages that have no dependencies or whose dependencies are already built
can be built concurrently. By default, all possible packages are built in
parallel. Use --max-workers to limit the number of concurrent builds.
"""
wkctx.enable_parallel_builds()
server.start_wheel_server(wkctx)
wheel_server_urls: list[str] = [wkctx.wheel_server_url]
if cache_wheel_server_url:
# put after local server so we always check local server first
wheel_server_urls.append(cache_wheel_server_url)
if force:
logger.info(f"rebuilding all wheels even if they exist in {wheel_server_urls}")
else:
logger.info(
f"skipping builds for versions of packages available at {wheel_server_urls}"
)
# Load the dependency graph
logger.info("reading dependency graph from %s", graph_file)
graph: dependency_graph.DependencyGraph
graph = dependency_graph.DependencyGraph.from_file(graph_file)
# Initialize the parallel build manager
build_manager = ParallelBuildManager(wkctx, graph)
# Get total count for progress tracking
total_nodes = len(
[n for n in graph.nodes.values() if n.key != dependency_graph.ROOT]
)
logger.info("found %d packages to build", total_nodes)
# A node can be built when all of its build dependencies are built
entries: list[BuildSequenceEntry] = []
with progress.progress_context(total=total_nodes) as progressbar:
def update_progressbar_cb(
future: concurrent.futures.Future[BuildSequenceEntry],
) -> None:
"""Immediately update the progress when when a task is done"""
progressbar.update()
while build_manager.have_remaining_nodes():
# Get batches of nodes that are ready to be built, organized by exclusivity
for buildable_nodes in build_manager.get_nodes_ready_to_build():
# Build up to max_workers nodes concurrently (or all if max_workers is None)
with concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers
) as executor:
futures: list[concurrent.futures.Future[BuildSequenceEntry]] = []
reqs: list[Requirement] = []
logger.info(
"starting to build batch: %s",
sorted(n.key for n in buildable_nodes),
)
for node in buildable_nodes:
req = Requirement(f"{node.canonicalized_name}=={node.version}")
reqs.append(req)
future = executor.submit(
_build_parallel,
wkctx=wkctx,
resolved_version=node.version,
req=req,
source_download_url=node.download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
future.add_done_callback(update_progressbar_cb)
futures.append(future)
# Wait for all builds to complete
for node, future in zip(buildable_nodes, futures, strict=True):
try:
entry = future.result()
entries.append(entry)
build_manager.mark_node_built(node)
# progress bar is updated in callback
except Exception as e:
logger.error(f"Failed to build {node.key}: {e}")
raise
metrics.summarize(wkctx, "Building in parallel")
_summary(wkctx, entries)
build_parallel._fromager_show_build_settings = True # type: ignore
Now let me update get_nodes_ready_to_build()
to call the update method at the beginning:
import collections.abc
import concurrent.futures
import dataclasses
import datetime
import graphlib
import json
import logging
import pathlib
import sys
import threading
import typing
from urllib.parse import urlparse
import click
import rich
import rich.box
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name, parse_wheel_filename
from packaging.version import Version
from rich.table import Table
from rich.text import Text
from fromager import (
build_environment,
clickext,
context,
dependency_graph,
hooks,
metrics,
overrides,
progress,
read,
server,
sources,
wheels,
)
from .. import resolver
from ..log import VERBOSE_LOG_FMT, ThreadLogFilter, req_ctxvar_context
logger = logging.getLogger(__name__)
DependencyNodeList = list[dependency_graph.DependencyNode]
@dataclasses.dataclass(order=True, frozen=True)
class BuildSequenceEntry:
# compare, hash, and sort by name and version
name: str
version: Version
prebuilt: bool = dataclasses.field(compare=False)
download_url: str = dataclasses.field(compare=False)
wheel_filename: pathlib.Path = dataclasses.field(compare=False)
skipped: bool = dataclasses.field(default=False, compare=False)
@staticmethod
def dict_factory(x):
return {
k: str(v) if isinstance(v, pathlib.Path | Version) else v for (k, v) in x
}
@click.command()
@click.option(
"--wheel-server-url",
default="",
type=str,
help="URL for the wheel server for builds",
)
@click.argument("dist_name")
@click.argument("dist_version", type=clickext.PackageVersion())
@click.argument("sdist_server_url")
@click.pass_obj
def build(
wkctx: context.WorkContext,
wheel_server_url: str,
dist_name: str,
dist_version: Version,
sdist_server_url: str,
) -> None:
"""Build a single version of a single wheel
DIST_NAME is the name of a distribution
DIST_VERSION is the version to process
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
1. Downloads the source distribution.
2. Unpacks it and prepares the source via patching, vendoring rust
dependencies, etc.
3. Prepares a build environment with the build dependencies.
4. Builds the wheel.
Refer to the 'step' commands for scripting these stages
separately.
"""
wkctx.wheel_server_url = wheel_server_url
server.start_wheel_server(wkctx)
req = Requirement(f"{dist_name}=={dist_version}")
with req_ctxvar_context(req, dist_version):
# We have to resolve the source here to get a
# source_url. Other build modes use data computed from a
# bootstrap job where that URL is saved in the build
# instruction file passed to build-sequence or build-parallel.
source_url, version = sources.resolve_source(
ctx=wkctx,
req=req,
sdist_server_url=sdist_server_url,
)
entry = _build(
wkctx=wkctx,
resolved_version=version,
req=req,
source_download_url=source_url,
force=True,
cache_wheel_server_url=None,
)
print(entry.wheel_filename)
build._fromager_show_build_settings = True # type: ignore
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels even if they have already been built",
)
@click.option(
"-c",
"--cache-wheel-server-url",
"cache_wheel_server_url",
help="url to a wheel server from where fromager can check if it had already built the wheel",
)
@click.argument("build_order_file")
@click.pass_obj
def build_sequence(
wkctx: context.WorkContext,
build_order_file: str,
force: bool,
cache_wheel_server_url: str | None,
) -> None:
"""Build a sequence of wheels in order
BUILD_ORDER_FILE is the build-order.json files to build
SDIST_SERVER_URL is the URL for a PyPI-compatible package index hosting sdists
Performs the equivalent of the 'build' command for each item in
the build order file.
"""
server.start_wheel_server(wkctx)
if force:
logger.info(
"rebuilding all wheels even if they exist in "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
else:
logger.info(
"skipping builds for versions of packages available at "
f"{wkctx.wheel_server_url=}, {cache_wheel_server_url=}"
)
entries: list[BuildSequenceEntry] = []
logger.info("reading build order from %s", build_order_file)
with read.open_file_or_url(build_order_file) as f:
for entry in progress.progress(json.load(f)):
dist_name = entry["dist"]
resolved_version = Version(entry["version"])
source_download_url = entry["source_url"]
# If we are building from git, use the requirement as specified so
# we include the URL. Otherwise, create a fake requirement with the
# name and version so we are explicitly building the expected
# version.
if entry["source_url_type"] == "git":
req = Requirement(entry["req"])
else:
req = Requirement(f"{dist_name}=={resolved_version}")
with req_ctxvar_context(req, resolved_version):
logger.info("building %s", resolved_version)
entry = _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
if entry.prebuilt:
logger.info(
"downloaded prebuilt wheel %s", entry.wheel_filename.name
)
elif entry.skipped:
logger.info(
"skipping building wheel since %s already exists",
entry.wheel_filename.name,
)
else:
logger.info("built %s", entry.wheel_filename.name)
entries.append(entry)
metrics.summarize(wkctx, "Building")
_summary(wkctx, entries)
build_sequence._fromager_show_build_settings = True # type: ignore
def _summary(ctx: context.WorkContext, entries: list[BuildSequenceEntry]) -> None:
output: list[typing.Any] = []
now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
output.append(Text(f"Build sequence summary {now}\n"))
built_entries = [e for e in entries if not e.skipped and not e.prebuilt]
if built_entries:
output.append(
_create_table(
built_entries,
title="New builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No new builds\n"))
prebuilt_entries = [e for e in entries if e.prebuilt]
if prebuilt_entries:
output.append(
_create_table(
prebuilt_entries,
title="Prebuilt wheels",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No pre-built wheels\n"))
skipped_entries = [e for e in entries if e.skipped and not e.prebuilt]
if skipped_entries:
output.append(
_create_table(
skipped_entries,
title="Skipped existing builds",
box=rich.box.MARKDOWN,
title_justify="left",
)
)
else:
output.append(Text("No skipped builds\n"))
console = rich.get_console()
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.md", "w", encoding="utf-8") as f:
console = rich.console.Console(file=f, width=sys.maxsize)
console.print(*output, sep="\n\n")
with open(ctx.work_dir / "build-sequence-summary.json", "w", encoding="utf-8") as f:
json.dump(
[
dataclasses.asdict(e, dict_factory=BuildSequenceEntry.dict_factory)
for e in entries
],
f,
)
def _create_table(entries: list[BuildSequenceEntry], **table_kwargs) -> Table:
table = Table(**table_kwargs)
table.add_column("Name", justify="right", no_wrap=True)
table.add_column("Version", no_wrap=True)
table.add_column("Wheel", no_wrap=True)
table.add_column("Source URL")
platlib_count = 0
for info in sorted(entries):
tags = parse_wheel_filename(info.wheel_filename.name)[3]
if any(t.platform != "any" or t.abi != "none" for t in tags):
platlib_count += 1
source_filename = urlparse(info.download_url).path.rsplit("/", 1)[-1]
table.add_row(
info.name,
str(info.version),
info.wheel_filename.name,
# escape Rich markup
rf"\[{source_filename}]({info.download_url})",
)
# summary
table.add_section()
table.add_row(
f"total: {len(entries)}",
None,
f"platlib: {platlib_count}",
None,
)
return table
def _build(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""Handle one version of one wheel.
Either:
1. Reuse an existing wheel we have locally.
2. Download a pre-built wheel.
3. Build the wheel from source.
"""
wheel_filename: pathlib.Path | None = None
use_exiting_wheel: bool = False
# Set up a log file for all of the details of the build for this one wheel.
# We attach a handler to the root logger so that all messages are logged to
# the file, and we add a filter to the handler so that only messages from
# the current thread are logged for when we build in parallel.
root_logger = logging.getLogger(None)
module_name = overrides.pkgname_to_override_module(req.name)
wheel_log = wkctx.logs_dir / f"{module_name}-{resolved_version}.log"
file_handler = logging.FileHandler(filename=str(wheel_log))
file_handler.setFormatter(logging.Formatter(VERBOSE_LOG_FMT))
file_handler.addFilter(ThreadLogFilter(threading.current_thread().name))
root_logger.addHandler(file_handler)
logger.info("starting processing")
pbi = wkctx.package_build_info(req)
prebuilt = pbi.pre_built
wheel_server_urls = wheels.get_wheel_server_urls(
wkctx, req, cache_wheel_server_url=cache_wheel_server_url
)
# See if we can reuse an existing wheel.
if not force:
wheel_filename = _is_wheel_built(
wkctx,
req.name,
resolved_version,
wheel_server_urls,
)
if wheel_filename:
logger.info("using existing wheel from %s", wheel_filename)
use_exiting_wheel = True
# Handle prebuilt wheels.
if prebuilt:
if not wheel_filename:
logger.info("downloading prebuilt wheel")
wheel_filename = wheels.download_wheel(
req=req,
wheel_url=source_download_url,
output_directory=wkctx.wheels_build,
)
else:
# already downloaded prebuilt wheel
use_exiting_wheel = True
# Run hooks for prebuilt wheels. At this point wheel_filename should
# be set either from _is_wheel_built() or download_wheel().
hooks.run_prebuilt_wheel_hooks(
ctx=wkctx,
req=req,
dist_name=req.name,
dist_version=str(resolved_version),
wheel_filename=wheel_filename,
)
# If we get here and still don't have a wheel filename, then we need to
# build the wheel.
if not wheel_filename:
source_filename = sources.download_source(
ctx=wkctx,
req=req,
version=resolved_version,
download_url=source_download_url,
)
logger.debug(
"saved sdist of version %s from %s to %s",
resolved_version,
source_download_url,
source_filename,
)
# Prepare source
source_root_dir = sources.prepare_source(
ctx=wkctx,
req=req,
source_filename=source_filename,
version=resolved_version,
)
# Build environment
build_env = build_environment.prepare_build_environment(
ctx=wkctx, req=req, sdist_root_dir=source_root_dir
)
# Make a new source distribution, in case we patched the code.
sdist_filename = sources.build_sdist(
ctx=wkctx,
req=req,
version=resolved_version,
sdist_root_dir=source_root_dir,
build_env=build_env,
)
# Build
wheel_filename = wheels.build_wheel(
ctx=wkctx,
req=req,
sdist_root_dir=source_root_dir,
version=resolved_version,
build_env=build_env,
)
hooks.run_post_build_hooks(
ctx=wkctx,
req=req,
dist_name=canonicalize_name(req.name),
dist_version=str(resolved_version),
sdist_filename=sdist_filename,
wheel_filename=wheel_filename,
)
wkctx.clean_build_dirs(source_root_dir, build_env)
root_logger.removeHandler(file_handler)
file_handler.close()
server.update_wheel_mirror(wkctx)
# After we update the wheel mirror, the built file has
# moved to a new directory.
wheel_filename = wkctx.wheels_downloads / wheel_filename.name
return BuildSequenceEntry(
name=canonicalize_name(req.name),
version=resolved_version,
prebuilt=prebuilt,
download_url=source_download_url,
wheel_filename=wheel_filename,
skipped=use_exiting_wheel,
)
def _is_wheel_built(
wkctx: context.WorkContext,
dist_name: str,
resolved_version: Version,
wheel_server_urls: list[str],
) -> pathlib.Path | None:
req = Requirement(f"{dist_name}=={resolved_version}")
try:
logger.info(
"checking if a suitable wheel for %s was already built on %s",
req,
wheel_server_urls,
)
url, _ = wheels.resolve_prebuilt_wheel(
ctx=wkctx,
req=req,
wheel_server_urls=wheel_server_urls,
)
logger.info("found candidate wheel %s", url)
pbi = wkctx.package_build_info(req)
build_tag_from_settings = pbi.build_tag(resolved_version)
build_tag = build_tag_from_settings if build_tag_from_settings else (0, "")
wheel_basename = resolver.extract_filename_from_url(url)
_, _, build_tag_from_name, _ = parse_wheel_filename(wheel_basename)
existing_build_tag = build_tag_from_name if build_tag_from_name else (0, "")
if (
existing_build_tag[0] > build_tag[0]
and existing_build_tag[1] == build_tag[1]
):
raise ValueError(
f"{dist_name}: changelog for version {resolved_version} is inconsistent. Found build tag {existing_build_tag} but expected {build_tag}"
)
if existing_build_tag != build_tag:
logger.info(
f"candidate wheel build tag {existing_build_tag} does not match expected build tag {build_tag}"
)
return None
wheel_filename: pathlib.Path | None = None
if url.startswith(wkctx.wheel_server_url):
logging.debug("found wheel on local server")
wheel_filename = wkctx.wheels_downloads / wheel_basename
if not wheel_filename.exists():
logger.info("wheel not found in local cache, preparing to download")
wheel_filename = None
if not wheel_filename:
# if the found wheel was on an external server, then download it
logger.info("downloading wheel from %s", url)
wheel_filename = wheels.download_wheel(req, url, wkctx.wheels_downloads)
return wheel_filename
except Exception:
logger.debug(
"could not locate prebuilt wheel %s-%s on %s",
dist_name,
resolved_version,
wheel_server_urls,
exc_info=True,
)
logger.info("could not locate prebuilt wheel")
return None
def _build_parallel(
wkctx: context.WorkContext,
resolved_version: Version,
req: Requirement,
source_download_url: str,
force: bool,
cache_wheel_server_url: str | None,
) -> BuildSequenceEntry:
"""
This function runs in a thread to manage the build of a single package.
"""
with req_ctxvar_context(req, resolved_version):
return _build(
wkctx=wkctx,
resolved_version=resolved_version,
req=req,
source_download_url=source_download_url,
force=force,
cache_wheel_server_url=cache_wheel_server_url,
)
class ParallelBuildManager:
"""Manages the logic for determining which nodes can be built in parallel."""
def __init__(
self, wkctx: context.WorkContext, graph: dependency_graph.DependencyGraph
):
self.wkctx = wkctx
self.graph = graph
self.built_node_keys: set[str] = set()
# Cache remaining nodes for efficiency - initialize with all non-root nodes
self._remaining_nodes: DependencyNodeList = [
node for node in graph.nodes.values() if node.key != dependency_graph.ROOT
]
# Initialize topological sorter for managing build order
self.build_sorter: graphlib.TopologicalSorter[
dependency_graph.DependencyNode
] = graph.get_build_dependency_topological_sorter()
def get_nodes_ready_to_build(
self,
) -> collections.abc.Generator[DependencyNodeList, None, None]:
"""Generator that yields lists of nodes ready to be built, organized by exclusivity.
Each exclusive build node is yielded in its own list.
All non-exclusive nodes are yielded together in a single list.
"""
# Update the build sorter to mark nodes as done when their install dependencies are satisfied
self._update_build_sorter_done_status()
# Get nodes that are ready to build from the build dependency sorter
# These nodes have their build dependencies satisfied AND their install dependencies satisfied
# (because we only mark nodes as done in the build sorter when install deps are satisfied)
buildable_nodes = list(self.build_sorter.get_ready())
if not buildable_nodes:
# If we can't build anything but still have nodes, we have a cycle
remaining: list[str] = [n.key for n in self._remaining_nodes]
logger.info("have already built: %s", sorted(self.built_node_keys))
raise ValueError(f"Circular dependency detected among: {remaining}")
logger.info(
"ready to build: %s",
sorted(n.key for n in buildable_nodes),
)
# Process nodes and yield exclusive ones immediately
non_exclusive_nodes: DependencyNodeList = []
for node in buildable_nodes:
if self.wkctx.settings.package_build_info(
node.canonicalized_name
).exclusive_build:
logger.info(f"{node.canonicalized_name}: requires exclusive build")
yield [node]
else:
non_exclusive_nodes.append(node)
# Yield all non-exclusive nodes together if any exist
if non_exclusive_nodes:
logger.info(
"ready to build: %s",
sorted(n.key for n in non_exclusive_nodes),
)
yield non_exclusive_nodes
def mark_node_built(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as built."""
self.built_node_keys.add(node.key)
# Remove from remaining nodes cache for efficiency
if node in self._remaining_nodes:
self._remaining_nodes.remove(node)
logger.debug("marked %s as built", node.key)
def _check_and_mark_done_in_build_sorter(self, node: dependency_graph.DependencyNode) -> None:
"""Mark a node as done in build sorter if all its install dependencies are built."""
# Check if all install dependencies of this node are built
for edge in node.children:
if edge.req_type.is_install_requirement:
if not self.is_node_built(edge.destination_node):
# Not all install dependencies are built yet
return
# All install dependencies are built, mark as done in build sorter
try:
self.build_sorter.done(node)
logger.debug("marked %s as done in build sorter (install deps satisfied)", node.key)
except ValueError:
# Node was not ready or already done
pass
def _update_build_sorter_done_status(self) -> None:
"""Check all built nodes to see if any can now be marked done in build sorter."""
for node in self.graph.get_all_nodes():
if node.key == dependency_graph.ROOT:
continue
if self.is_node_built(node):
self._check_and_mark_done_in_build_sorter(node)
def have_remaining_nodes(self) -> bool:
"""Check if there are any nodes left to build."""
return bool(self._remaining_nodes)
def get_built_nodes(self) -> DependencyNodeList:
"""Get all nodes that have been built."""
return [
node
for node in self.graph.nodes.values()
if node.key in self.built_node_keys
]
def is_node_built(self, node: dependency_graph.DependencyNode) -> bool:
"""Check if a specific node has been built."""
return node.key in self.built_node_keys
@click.command()
@click.option(
"-f",
"--force",
is_flag=True,
default=False,
help="rebuild wheels e