Last active
August 13, 2023 06:52
-
-
Save rrbutani/5628aaea2a42bdff034dab5a9135d70c to your computer and use it in GitHub Desktop.
find_package.bzl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Helper functions for repository rules that ensure labels are well-formed. | |
Labels constructed and used within repository rules do not have their name (the | |
part after `:`) checked against the package structure of the repo they point to; | |
they are allowed to refer to source artifacts via paths that do not respect the | |
package structure or visibility. | |
For example, for a source file that lives at `@foo//bar/baz:some/file/path`, all | |
of the following — when passed to repository context functions like `rctx.path` | |
or `rctx.read` — will refer to the file, without error: | |
- `@foo//:bar/baz/some/file/path` | |
- `@foo//bar:baz/some/file/path` | |
- `@foo//bar/baz:some/file/path` | |
However, the package path part of the label is still checked and so referring to | |
packages that do not exist is not allowed; i.e. these would fail: | |
- `@foo//bar/baz/some:file/path` | |
- `@foo//bar/baz/some/file:path` | |
This is useful in some contexts; for example when repository rules have a | |
repo-relative file path and do not know how the path corresponds to the package | |
structure of that repo. | |
However, only labels used from within a repository context implementation | |
function seem to enjoy this kind of permissive package path interpretation. | |
Labels that are used in `BUILD` files (even if they are generated from | |
repository rules or come from `.bzl` files) and even labels _passed_ to | |
repository rule invocations must refer obey the package structure. | |
This presents a problem for repository rules looking to do codegen: these rules | |
are able to read arbitrary files witin repositories that have been loaded but | |
they cannot produce starlark that then _refers_ to these files. | |
To address this use case, this file provides helper functions that discover the | |
package structure by traversing the directories leading to a file and searching | |
for `BUILD`/`BUILD.bazel` files, ultimately rewriting package-structure-unaware | |
labels into well-formed labels that are valid for use everywhere (visibility, | |
however, is out of our hands; you will still get errors if artifacts have not | |
been exported). | |
""" | |
load(":strings.bzl", "error", "info") | |
def _check_for_build_files(path, build_file_lookup_cache): | |
if build_file_lookup_cache != None and path in build_file_lookup_cache: | |
return build_file_lookup_cache[path] | |
else: | |
res = ( | |
path.get_child("BUILD.bazel").exists or | |
path.get_child("BUILD").exists | |
) | |
if build_file_lookup_cache != None: | |
build_file_lookup_cache[path] = res | |
return res | |
# Note: the repository rule this is invoked from will *not* be re-run on changes | |
# to the package structure! | |
# | |
# For example, if a `BUILD.bazel` file is created, invalidating existing | |
# `Label`s returned by this function, you will have to re-run your repository | |
# rule manually. | |
def fix_package_path(rctx, src_label, build_file_lookup_cache = None, sibling_label_in_repo = None): | |
if type(src_label) != "Label": | |
error("`src_label` must be a `Label`, got a {}", type(src_label)) | |
# Note: the usual `Label` restrictions still apply to `src_label` even | |
# though we're in a repository rule: | |
# - targets cannot start with `/` | |
# - targets cannot contain `.` or `..` as a path segment | |
# - `\\` is not allowed as a separator | |
# - package paths cannot end with `/` | |
# - etc. | |
# Since we have not asked for `path.realpath`, we should be able to safely | |
# assume that the directory structure matches what is in `src_label`. | |
# | |
# First we gather the repo-relative path segments from our label: | |
split_if_not_empty = lambda s: s.split("/") if len(s) != 0 else [] | |
get_segments = lambda label: ( | |
split_if_not_empty(label.package) + | |
split_if_not_empty(label.name) | |
) | |
segments = get_segments(src_label) | |
label_name_parts = [] # in reverse order! | |
# Doing `rctx.path(src_label)` causes Bazel to register `src_label` as a | |
# dependency of the repository this function is invoked from; i.e.: any | |
# changes to `src_label`'s contents will cause the repo to be recreated. | |
# | |
# For use cases where this function is useful (i.e. generating lists of | |
# labels from a file) this is often undesirable; the output of such | |
# repository rules tends to not depend on the contents of files it is | |
# creating labels to. | |
# | |
# Fortunately, `rctx.path(string)` does not register such a dependency. (see | |
# this design doc for context: | |
# https://docs.google.com/document/d/17RZKMuMjAIgNfFdrsLhqNsMTQDSS-BBA-S-fCSBPV7M/edit#heading=h.b6qpzslgwsw4) | |
# | |
# So, we'd like to get a path from `src_label`. We can figure out the repo | |
# relative parts by extracting the package and name parts of the label but | |
# unfortunately we still need to send a valid label through | |
# `rctx.path(Label)` in order to map the `@repo` part to a path. | |
# | |
# This is what `sibling_label_in_repo` is for; we'll accept any valid Label | |
# in the repo where `src_label` is from. Ideally this should be an | |
# infrequently modified file, to minimize repository rule re-runs for the | |
# caller. | |
if sibling_label_in_repo != None: | |
if type(sibling_label_in_repo) != "Label": error("must be a Label") | |
if sibling_label_in_repo.workspace_name != src_label.workspace_name: | |
error( | |
""" | |
`{}` is not a valid sibling label for `{}` | |
should be in repo `{}`; got `{}` | |
""", | |
sibling_label_in_repo, src_label, | |
src_label.workspace_name, sibling_label_in_repo.workspace_name, | |
) | |
sibling_path = rctx.path(sibling_label_in_repo) | |
# call dirname until we reach the repo root: | |
sibling_segments = get_segments(sibling_label_in_repo) | |
repo_root = sibling_path | |
for seg in sibling_segments[::-1]: | |
if repo_root.basename != seg: error("basename mismatch") | |
repo_root = repo_root.dirname | |
if repo_root == None: error("hit root in sibling") | |
# append the repo-relative path to `src_label` | |
path = repo_root.get_child(*segments) | |
else: | |
# fallback to using `rctx.path` | |
info(""" | |
warning: no sibling label provided, using `rctx.path` on `{}` | |
(this may cause spurious repository rule re-runs for `@{}`) | |
""", src_label, rctx.name) | |
path = rctx.path(src_label) | |
if not path.exists: | |
# should be unreachable; `rctx.path(...)` should error in this case | |
error("path `{}` for label `{}` does not exist", path, src_label) | |
# Now walk up the directory tree that `path` is in until we reach see | |
# `BUILD.bazel` or `BUILD`: | |
for _ in range(len("{}".format(src_label))): # stand in for `while True:` | |
if len(segments) == 0: | |
error("walked all the way up `{}`, found no BUILD files", src_label) | |
last = segments.pop() | |
base = path.basename | |
if last != base: | |
error("expected `basename` to be `{}`, got `{}`", last, base) | |
label_name_parts.append(last) | |
path = path.dirname | |
if path == None: error("no parent dir?") | |
# Check for `BUILD.bazel` or `BUILD`: | |
is_package = _check_for_build_files(path, build_file_lookup_cache) | |
if is_package: | |
break | |
# Note: need to use `@@` for the workspace name in the label we yield | |
# because `src_label.workspace_name` gives us the resolved label name; i.e. | |
# `Label("@bazel_skylib//:all").workspace_name` would give us something like | |
# `"bazel_skylib~1.4.2"` when using bzlmod. | |
return Label("@@{repo}//{package_path}:{name}".format( | |
repo = src_label.workspace_name, | |
package_path = "/".join(segments), | |
name = "/".join(label_name_parts[::-1]), | |
)) | |
def find_package_path(rctx, src_label, build_file_lookup_cache = None): | |
return fix_package_path(rctx, src_label, build_file_lookup_cache).package |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment