Skip to content

Instantly share code, notes, and snippets.

@rrbutani
Last active August 13, 2023 06:52
Show Gist options
  • Save rrbutani/5628aaea2a42bdff034dab5a9135d70c to your computer and use it in GitHub Desktop.
Save rrbutani/5628aaea2a42bdff034dab5a9135d70c to your computer and use it in GitHub Desktop.
find_package.bzl
"""Helper functions for repository rules that ensure labels are well-formed.
Labels constructed and used within repository rules do not have their name (the
part after `:`) checked against the package structure of the repo they point to;
they are allowed to refer to source artifacts via paths that do not respect the
package structure or visibility.
For example, for a source file that lives at `@foo//bar/baz:some/file/path`, all
of the following — when passed to repository context functions like `rctx.path`
or `rctx.read` — will refer to the file, without error:
- `@foo//:bar/baz/some/file/path`
- `@foo//bar:baz/some/file/path`
- `@foo//bar/baz:some/file/path`
However, the package path part of the label is still checked and so referring to
packages that do not exist is not allowed; i.e. these would fail:
- `@foo//bar/baz/some:file/path`
- `@foo//bar/baz/some/file:path`
This is useful in some contexts; for example when repository rules have a
repo-relative file path and do not know how the path corresponds to the package
structure of that repo.
However, only labels used from within a repository context implementation
function seem to enjoy this kind of permissive package path interpretation.
Labels that are used in `BUILD` files (even if they are generated from
repository rules or come from `.bzl` files) and even labels _passed_ to
repository rule invocations must refer obey the package structure.
This presents a problem for repository rules looking to do codegen: these rules
are able to read arbitrary files witin repositories that have been loaded but
they cannot produce starlark that then _refers_ to these files.
To address this use case, this file provides helper functions that discover the
package structure by traversing the directories leading to a file and searching
for `BUILD`/`BUILD.bazel` files, ultimately rewriting package-structure-unaware
labels into well-formed labels that are valid for use everywhere (visibility,
however, is out of our hands; you will still get errors if artifacts have not
been exported).
"""
load(":strings.bzl", "error", "info")
def _check_for_build_files(path, build_file_lookup_cache):
if build_file_lookup_cache != None and path in build_file_lookup_cache:
return build_file_lookup_cache[path]
else:
res = (
path.get_child("BUILD.bazel").exists or
path.get_child("BUILD").exists
)
if build_file_lookup_cache != None:
build_file_lookup_cache[path] = res
return res
# Note: the repository rule this is invoked from will *not* be re-run on changes
# to the package structure!
#
# For example, if a `BUILD.bazel` file is created, invalidating existing
# `Label`s returned by this function, you will have to re-run your repository
# rule manually.
def fix_package_path(rctx, src_label, build_file_lookup_cache = None, sibling_label_in_repo = None):
if type(src_label) != "Label":
error("`src_label` must be a `Label`, got a {}", type(src_label))
# Note: the usual `Label` restrictions still apply to `src_label` even
# though we're in a repository rule:
# - targets cannot start with `/`
# - targets cannot contain `.` or `..` as a path segment
# - `\\` is not allowed as a separator
# - package paths cannot end with `/`
# - etc.
# Since we have not asked for `path.realpath`, we should be able to safely
# assume that the directory structure matches what is in `src_label`.
#
# First we gather the repo-relative path segments from our label:
split_if_not_empty = lambda s: s.split("/") if len(s) != 0 else []
get_segments = lambda label: (
split_if_not_empty(label.package) +
split_if_not_empty(label.name)
)
segments = get_segments(src_label)
label_name_parts = [] # in reverse order!
# Doing `rctx.path(src_label)` causes Bazel to register `src_label` as a
# dependency of the repository this function is invoked from; i.e.: any
# changes to `src_label`'s contents will cause the repo to be recreated.
#
# For use cases where this function is useful (i.e. generating lists of
# labels from a file) this is often undesirable; the output of such
# repository rules tends to not depend on the contents of files it is
# creating labels to.
#
# Fortunately, `rctx.path(string)` does not register such a dependency. (see
# this design doc for context:
# https://docs.google.com/document/d/17RZKMuMjAIgNfFdrsLhqNsMTQDSS-BBA-S-fCSBPV7M/edit#heading=h.b6qpzslgwsw4)
#
# So, we'd like to get a path from `src_label`. We can figure out the repo
# relative parts by extracting the package and name parts of the label but
# unfortunately we still need to send a valid label through
# `rctx.path(Label)` in order to map the `@repo` part to a path.
#
# This is what `sibling_label_in_repo` is for; we'll accept any valid Label
# in the repo where `src_label` is from. Ideally this should be an
# infrequently modified file, to minimize repository rule re-runs for the
# caller.
if sibling_label_in_repo != None:
if type(sibling_label_in_repo) != "Label": error("must be a Label")
if sibling_label_in_repo.workspace_name != src_label.workspace_name:
error(
"""
`{}` is not a valid sibling label for `{}`
should be in repo `{}`; got `{}`
""",
sibling_label_in_repo, src_label,
src_label.workspace_name, sibling_label_in_repo.workspace_name,
)
sibling_path = rctx.path(sibling_label_in_repo)
# call dirname until we reach the repo root:
sibling_segments = get_segments(sibling_label_in_repo)
repo_root = sibling_path
for seg in sibling_segments[::-1]:
if repo_root.basename != seg: error("basename mismatch")
repo_root = repo_root.dirname
if repo_root == None: error("hit root in sibling")
# append the repo-relative path to `src_label`
path = repo_root.get_child(*segments)
else:
# fallback to using `rctx.path`
info("""
warning: no sibling label provided, using `rctx.path` on `{}`
(this may cause spurious repository rule re-runs for `@{}`)
""", src_label, rctx.name)
path = rctx.path(src_label)
if not path.exists:
# should be unreachable; `rctx.path(...)` should error in this case
error("path `{}` for label `{}` does not exist", path, src_label)
# Now walk up the directory tree that `path` is in until we reach see
# `BUILD.bazel` or `BUILD`:
for _ in range(len("{}".format(src_label))): # stand in for `while True:`
if len(segments) == 0:
error("walked all the way up `{}`, found no BUILD files", src_label)
last = segments.pop()
base = path.basename
if last != base:
error("expected `basename` to be `{}`, got `{}`", last, base)
label_name_parts.append(last)
path = path.dirname
if path == None: error("no parent dir?")
# Check for `BUILD.bazel` or `BUILD`:
is_package = _check_for_build_files(path, build_file_lookup_cache)
if is_package:
break
# Note: need to use `@@` for the workspace name in the label we yield
# because `src_label.workspace_name` gives us the resolved label name; i.e.
# `Label("@bazel_skylib//:all").workspace_name` would give us something like
# `"bazel_skylib~1.4.2"` when using bzlmod.
return Label("@@{repo}//{package_path}:{name}".format(
repo = src_label.workspace_name,
package_path = "/".join(segments),
name = "/".join(label_name_parts[::-1]),
))
def find_package_path(rctx, src_label, build_file_lookup_cache = None):
return fix_package_path(rctx, src_label, build_file_lookup_cache).package
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment