Created
July 14, 2020 21:53
-
-
Save alexeagle/9fd6684e9306cf741f246dd3518a48ec to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Read a poetry lock file and convert to a bazel dependency graph, | |
then write BUILD.bazel files for each installed dependency. | |
""" | |
import textwrap | |
import os | |
import re | |
import sys | |
def parse_direct_deps(buf): | |
""" | |
Read the pyproject.toml file, which lists direct dependencies of the project. | |
Note: avoid a dependency on a toml parser as it's hard to bootstrap deps. | |
""" | |
line = buf.readline() | |
deps = [] | |
state = "scan_package" | |
while line: | |
if ( | |
line.strip() == "[tool.poetry.dependencies]" | |
or line.strip() == "[tool.poetry.dev-dependencies]" | |
): | |
state = "add_deps" | |
elif line.startswith("["): | |
state = "scan_package" | |
elif line.startswith("#"): | |
pass | |
elif state == "add_deps" and len(line.strip()): | |
dep = line.split("=")[0].strip() | |
if len(dep) and dep != "python": | |
deps.append(dep) | |
line = buf.readline() | |
return deps | |
def parse_dep_graph(buf): | |
""" | |
Read the poetry lock file, which lists the dependencies of each package. | |
Note: avoid a dependency on a toml parser as it's hard to bootstrap deps. | |
""" | |
line = buf.readline() | |
dep_graph = {} | |
state = "scan_package" | |
name_line = re.compile(r"\s*name\s=\s\"(.*)\"") | |
while line: | |
if line.strip() == "[[package]]": | |
state = "scan_name" | |
elif line.strip() == "[package.dependencies]": | |
state = "add_deps" | |
elif line.startswith("["): | |
state = "scan_package" | |
else: | |
name_match = name_line.match(line) | |
if name_match and state == "scan_name": | |
pkg = name_match[1] | |
dep_graph[pkg] = [] | |
state = "scan_deps" | |
if state == "add_deps" and len(line.strip()): | |
dep = line.split("=")[0].strip() | |
dep_graph[pkg].append(dep) | |
if dep not in dep_graph.keys(): | |
dep_graph[dep] = [] | |
line = buf.readline() | |
return dep_graph | |
def to_label(pkg): | |
return "\"//%s\"" % pkg | |
_HEADER = """\ | |
# Generated by bazel/python/poetry/generate_build_files.py | |
# as part of poetry_install | |
load("@rules_python//python:defs.bzl", "py_library") | |
package(default_visibility = ["//visibility:public"]) | |
""" | |
def generate_top_level_build(direct_deps): | |
""" | |
This BUILD file appears at the root of the @my_deps workspace | |
""" | |
# Flatten and de-duplicate dependencies | |
deps = set([d for deps in direct_deps for d in deps]) | |
return _HEADER + textwrap.dedent( | |
"""\ | |
# This re-exports all of the direct dependencies listed in the pyproject.toml file | |
py_library(name = "all", deps = [{}]) | |
""".format( | |
", ".join([to_label(k) for k in sorted(deps)]) | |
) | |
) | |
def generate_pkg_target( | |
name, dependencies, pkg_content_kind, comment="Generated target" | |
): | |
""" | |
This BUILD file appears at the top of the installed packages at | |
@my_deps/__sitepkgs__ | |
""" | |
if pkg_content_kind == "dir": | |
srcs = """glob(["{name}/**/*.py"], allow_empty = True)""" | |
# Workaround bazelbuild/bazel#4327 Runfiles: support paths with spaces | |
data = """glob(["{name}/**/*"], exclude=["{name}/**/*.py", "{name}/**/* *"])""" | |
elif pkg_content_kind == "py_file": | |
srcs = """["{name}.py"]""" | |
data = "[]" | |
elif pkg_content_kind == "so_file": | |
srcs = "[]" | |
data = """glob(["{name}.*.so"])""" | |
elif pkg_content_kind == "empty": | |
comment = ( | |
"No files were found for this package. It might indicate a bug." | |
) | |
srcs = "[]" | |
data = "[]" | |
else: | |
raise Exception("unknown package content kind", pkg_content_kind) | |
# Flatten and de-duplicate dependencies | |
deps = set([d for deps in dependencies for d in deps]) | |
return textwrap.dedent( | |
"""\ | |
# {comment} | |
py_library( | |
name = "{{name}}", | |
srcs = {srcs}, | |
data = {data}, | |
imports = ["."], | |
deps = [{{dependencies}}], | |
) | |
""".format( | |
comment=comment, data=data, srcs=srcs, | |
).format( | |
name=name, | |
dependencies=", ".join( | |
["\"%s\"" % d for d in sorted(deps) if d != name] | |
), | |
) | |
) | |
def generate_alias_build(top_level, alias_target): | |
""" | |
This BUILD file appears in the package-specific vanity location | |
@my_deps/pkg | |
so that users can dep on '@my_deps//pkg' rather than '@my_deps//__sitepkgs__/pkg' | |
""" | |
return _HEADER + textwrap.dedent( | |
"""\ | |
# Convenience alias so you can dep on @my_deps//pkg | |
alias(name = "{name}", actual = "{actual}") | |
""".format( | |
name=top_level, actual=alias_target | |
) | |
) | |
def top_level_names(pkg, site_packages): | |
""" | |
Find out what top-level names the package can be imported with | |
For example, .venv/lib/python3.8/site-packages/python_dateutil-2.8.1.dist-info/top_level.txt | |
tells us that the package is imported as "dateutil" | |
""" | |
for d in os.listdir(site_packages): | |
if ( | |
d.endswith(".dist-info") or d.endswith(".egg-info") | |
) and d.lower().startswith(pkg.lower().replace("-", "_") + "-"): | |
top_level_file = os.path.join(site_packages, d, "top_level.txt") | |
if os.path.exists(top_level_file): | |
with open( | |
os.path.join(site_packages, d, "top_level.txt") | |
) as top_level_file: | |
# Filter out deep imports like googleapiclient/discovery_cache | |
# Also filter out names prefixed with underscore | |
return [ | |
l.strip() | |
for l in top_level_file.readlines() | |
if "/" not in l and not l.startswith("_") | |
] | |
print( | |
"WARNING:", | |
pkg, | |
"has no top-level.txt in its distribution, assuming it is imported as", | |
pkg, | |
) | |
return [pkg] | |
def main(argv): | |
if len(argv) < 3: | |
print( | |
"Usage: generate_build_files.py path/to/pyproject.toml path/to/poetry.lock path/to/installed/site_packages", | |
file=sys.stderr, | |
) | |
return 1 | |
[toml_file, lock_file, site_packages] = argv | |
with open(toml_file) as toml: | |
direct_deps = parse_direct_deps(toml) | |
with open(lock_file) as lock: | |
dep_graph = parse_dep_graph(lock) | |
with open('BUILD.bazel', 'w') as top_build: | |
top_build.write( | |
generate_top_level_build( | |
[top_level_names(dep, site_packages) for dep in direct_deps] | |
) | |
) | |
# What top-level imports are possible from this site_packages? | |
top_importable = os.listdir(site_packages) | |
with open(os.path.join(site_packages, "BUILD.bazel"), 'w') as pkgs_build: | |
pkgs_build.write(_HEADER) | |
for [pkg, deps] in dep_graph.items(): | |
for top_level in top_level_names(pkg, site_packages): | |
# Multiple packages can amend the same top-level import | |
if not os.path.exists(top_level): | |
os.mkdir(top_level) | |
with open( | |
"%s/BUILD.bazel" % top_level, 'w' | |
) as alias_build: | |
alias_target = "//%s:%s" % (site_packages, top_level) | |
alias_build.write( | |
generate_alias_build(top_level, alias_target) | |
) | |
# Some packages get installed as single file instead of directory | |
# We just have to look on disk to see what poetry decided to do | |
pkg_content_kind = None | |
if top_level in top_importable: | |
pkg_content_kind = "dir" | |
elif top_level + ".py" in top_importable: | |
pkg_content_kind = "py_file" | |
elif any( | |
[ | |
i.startswith(top_level + ".") and i.endswith(".so") | |
for i in top_importable | |
] | |
): | |
# for example pvectorc.cpython-38-darwin.so | |
pkg_content_kind = "so_file" | |
else: | |
pkg_content_kind = "empty" | |
if pkg_content_kind: | |
pkgs_build.write( | |
generate_pkg_target( | |
top_level, | |
# TODO: we only look at the deps for the first package that | |
# contributed to top_level, but it's possible that other packages | |
# with additional deps also contribute to the same top_level | |
[ | |
top_level_names(d, site_packages) | |
for d in deps | |
], | |
pkg_content_kind, | |
"Generated from " + pkg, | |
) | |
) | |
return 0 | |
if __name__ == "__main__": | |
sys.exit(main(sys.argv[1:])) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"Repository rule to run poetry to install dependencies during WORKSPACE loading" | |
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") | |
_ATTRS = { | |
"pyproject": attr.label(doc = "The pyproject.toml file to install", mandatory = True), | |
"poetry_lock": attr.label(doc = "The poetry.lock file"), | |
"quiet": attr.bool(doc = "Whether to print the output from poetry"), | |
"timeout": attr.int( | |
default = 3600, | |
doc = "Maximum duration of the package manager execution in seconds.", | |
), | |
"python_interpreter": attr.label( | |
doc = "A python interpreter to run poetry under", | |
default = "@python_interpreter//:python_bin", | |
), | |
} | |
# When you run the poetry installer, it creates this BIN entry for your $PATH | |
# From https://github.com/python-poetry/poetry/blob/1.0.9/get-poetry.py#L200-L218 | |
# And slightly adapted for the lib path which differs in the poetry distribution | |
_POETRY_BIN = """ | |
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import glob | |
import sys | |
import os | |
lib = os.path.normpath(os.path.join(os.path.realpath("{}"), "../..")) | |
vendors = os.path.join(lib, "_vendor") | |
current_vendors = os.path.join( | |
vendors, "py{{}}".format(".".join(str(v) for v in sys.version_info[:2])) | |
) | |
sys.path.insert(0, lib) | |
sys.path.insert(0, current_vendors) | |
if __name__ == "__main__": | |
from poetry.console import main | |
main() | |
""" | |
def _impl(repository_ctx): | |
py_interpreter = repository_ctx.path(repository_ctx.attr.python_interpreter) | |
poetry_main = repository_ctx.path(Label("@poetry//:__main__.py")) | |
# Lay out the working directory (output_base/external/users_deps) so that poetry | |
# runs in a project environment it expects. | |
repository_ctx.file("_poetry_bin.py", content = _POETRY_BIN.format(poetry_main)) | |
repository_ctx.symlink(repository_ctx.attr.pyproject, "pyproject.toml") | |
repository_ctx.symlink(repository_ctx.attr.poetry_lock, "poetry.lock") | |
repository_ctx.symlink(Label("//bazel/python/poetry:generate_build_files.py"), "generate_build_files.py") | |
poetry_args = ["install", "--no-interaction", "--no-ansi"] | |
# OPT: we could expose environment as an attribute | |
poetry_env = { | |
# Bazel will keep its own copy of the venv so we can write BUILD files there | |
# https://python-poetry.org/docs/configuration/#virtualenvsin-project-boolean | |
"POETRY_VIRTUALENVS_IN_PROJECT": "true", | |
# TODO: maybe we don't want to create virtualenvs at all? need to understand how they are used | |
# but if we do this, where does poetry put the downloaded packages? | |
# "POETRY_VIRTUALENVS_CREATE": "false", | |
} | |
repository_ctx.report_progress("Running poetry install on %s" % repository_ctx.attr.pyproject) | |
result = repository_ctx.execute( | |
[py_interpreter, "_poetry_bin.py"] + poetry_args, | |
timeout = repository_ctx.attr.timeout, | |
quiet = repository_ctx.attr.quiet, | |
environment = poetry_env, | |
) | |
if result.return_code: | |
fail("poetry_install failed:\nSTDOUT:\n%s\nSTDERR:\n%s" % (result.stdout, result.stderr)) | |
# rules_python doesn't allow hyphens anywhere in the path when referencing files. | |
# So we symlink the site-packages to sitepkgs for bazel labels to be unaware of the hyphen | |
# See https://github.com/bazelbuild/bazel/issues/9171 | |
# FIXME: where does the "3.8" come from? | |
repository_ctx.symlink(repository_ctx.path(".venv/lib/python3.8/site-packages"), "__sitepkgs__") | |
repository_ctx.report_progress("Processing site-packages: generating BUILD files") | |
result = repository_ctx.execute([ | |
py_interpreter, | |
"generate_build_files.py", | |
repository_ctx.path("pyproject.toml"), | |
repository_ctx.path("poetry.lock"), | |
"__sitepkgs__", | |
]) | |
if result.return_code: | |
fail("generate_build_files.py failed:\nSTDOUT:\n%s\nSTDERR:\n%s" % (result.stdout, result.stderr)) | |
poetry_install_rule = repository_rule( | |
implementation = _impl, | |
attrs = _ATTRS, | |
) | |
def poetry_install(**kwargs): | |
"Wrapper macro around the repository rule" | |
# Use a maybe so this only runs the first time poetry_install is called | |
_maybe( | |
http_archive, | |
name = "poetry", | |
sha256 = "073b2e557f4a53605da6009a8b3585de00ffef4bfece4dfc3d974b8e5f00d481", | |
strip_prefix = "poetry", | |
# Add a build file here, just to produce a label that we can reference from the repository rule | |
# so we can determine the path to poetry library | |
build_file_content = """exports_files(["__main__.py"])""", | |
# FIXME: need linux URL also, see rules_nodejs node_repositories#_download_node for ideas. | |
# Also it needs to be a toolchain so a docker container gets a linux python interpreter | |
# TODO: also understand whether this becomes a cache key for something | |
# and whether it needs a toolchain for cross-compile?? | |
urls = ["https://github.com/python-poetry/poetry/releases/download/1.0.9/poetry-1.0.9-darwin.tar.gz"], | |
) | |
poetry_install_rule(**kwargs) | |
def _maybe(repo_rule, name, **kwargs): | |
if name not in native.existing_rules(): | |
repo_rule(name = name, **kwargs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment