Last active
December 4, 2023 13:15
-
-
Save pradyunsg/22ca089b48ca55d75ca843a5946b2691 to your computer and use it in GitHub Desktop.
Figuring out the top-level importable names from a wheel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Takes a .whl file and figures out the top-level importable names in that wheel. | |
Usage: | |
$ python find-top-level-from-wheel-file.py ./setuptools-65.4.1-py3-none-any.whl | |
['_distutils_hack', 'pkg_resources', 'setuptools'] | |
Testing: | |
$ pytest find-top-level-from-wheel-file.py | |
... | |
===== 2 passed in 0.01s ===== | |
x-ref: https://github.com/PyO3/maturin/issues/1154#issuecomment-1264498648 | |
""" | |
# Licensed under the MIT license. | |
# | |
# Copyright (c) 2022 Pradyun Gedam <[email protected]> | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the “Software”), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in | |
# all copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
import json | |
import sys | |
from collections import deque | |
from typing import Iterable | |
from installer.sources import WheelSource | |
from installer.utils import parse_metadata_file | |
def _find_importable_components_from_wheel_content_listing( | |
filepaths: Iterable[str], *, dist_info_dir: str, data_dir: str | |
) -> Iterable[tuple[str, ...]]: | |
purelib_str = f"{data_dir}/purelib/" | |
platlib_str = f"{data_dir}/platlib/" | |
for path in filepaths: | |
if path.startswith(dist_info_dir): | |
# Nothing in dist-info is importable. | |
continue | |
if path.startswith((platlib_str, purelib_str)): | |
# Remove the prefix from purelib and platlib files. | |
name = path[len(platlib_str) :] | |
elif path.startswith(data_dir): | |
# Nothing else in data is importable. | |
continue | |
else: | |
# Top level files end up in an importable location. | |
name = path | |
if name.endswith(".py"): | |
yield tuple(name[: -len(".py")].split("/")) | |
def test_find_importable_components_from_wheel_content_listing(): | |
# GIVEN | |
filepaths = [ | |
"zero.py", | |
"foo.data/purelib/one.py", | |
"foo.data/purelib/two/three.py", | |
"foo.data/platlib/four.py", | |
"foo.data/platlib/five/six.py", | |
"foo.data/scripts/six.py", | |
"foo.data/scripts/seven/eight.py", | |
"foo.dist-info/nine.py", | |
] | |
data_dir = "foo.data" | |
dist_info_dir = "foo.dist-info" | |
# WHEN | |
result = _find_importable_components_from_wheel_content_listing( | |
filepaths, data_dir=data_dir, dist_info_dir=dist_info_dir | |
) | |
# THEN | |
assert list(result) == [ | |
("zero",), | |
("one",), | |
("two", "three"), | |
("four",), | |
("five", "six"), | |
] | |
def _determine_major_import_names( | |
importable_components: Iterable[tuple[str, ...]] | |
) -> Iterable[str]: | |
# If you literally want the "top level", just do... | |
# return {components[0] for components in importable_components} | |
# Here, we're going to try to find the longest initial import name instead. | |
# Mostly, because this was a fun problem to thing through. | |
# Build a tree out of the components | |
tree = {} | |
for components in importable_components: | |
subtree = tree | |
for segment in components: | |
if segment not in subtree: | |
subtree[segment] = {} | |
subtree = subtree[segment] | |
# Recurse through the tree to find the names which have != 1 children. | |
queue = deque() | |
queue.appendleft((tree, ())) | |
while queue: | |
current_tree, current_name = queue.popleft() | |
for name, subtree in current_tree.items(): | |
subname = (*current_name, name) | |
if len(subtree) == 1: | |
queue.append((subtree, subname)) | |
elif name == "__init__": | |
yield ".".join(current_name) | |
else: | |
yield ".".join(subname) | |
def test_determine_major_import_names(): | |
# GIVEN | |
components = [ | |
("zero",), | |
("one", "__init__"), | |
("two",), | |
("two", "three"), | |
("two", "four"), | |
("five", "six", "seven"), | |
("five", "six", "eight"), | |
("nine", "ten", "__init__"), | |
("eleven", "twelve", "__init__"), | |
("eleven", "twelve", "thirteen", "__init__"), | |
("eleven", "twelve", "fourteen", "__init__"), | |
] | |
# WHEN | |
result = _determine_major_import_names(components) | |
# THEN | |
assert set(result) == { | |
"zero", | |
"one", | |
"two", | |
"five.six", | |
"nine.ten", | |
"eleven.twelve", | |
} | |
def find_major_import_import_names(wheel: WheelSource) -> Iterable[str]: | |
metadata = parse_metadata_file(wheel.read_dist_info("WHEEL")) | |
if not (metadata["Wheel-Version"] and metadata["Wheel-Version"].startswith("1.")): | |
raise NotImplementedError("Only supports wheel 1.x") | |
filepaths: Iterable[str] = ( | |
record_elements[0] for record_elements, _, _ in wheel.get_contents() | |
) | |
importable_components = _find_importable_components_from_wheel_content_listing( | |
filepaths, dist_info_dir=wheel.dist_info_dir, data_dir=wheel.data_dir | |
) | |
return _determine_major_import_names(importable_components) | |
if __name__ == "__main__": | |
import zipfile | |
from installer.sources import WheelFile | |
path: str = sys.argv[1] | |
with zipfile.ZipFile(path) as archive: | |
wheel_file = WheelFile(archive) | |
print(list(find_major_import_import_names(wheel_file))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment