Last active
November 19, 2024 16:42
-
-
Save Sachaa-Thanasius/f9b484299267ff3e893720c823b5c8cf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ruff: noqa: ERA001, RUF012, ANN001, ANN206 | |
"""A proof of concept for a simple, ~50 LOC (excluding docstrings/comments) | |
lazy-imports-enabling context manager to be added to and used within the standard library. | |
This would fulfill common use cases both inside and outside the Python standard library | |
e.g. inline imports and if TYPE_CHECKING blocks for typing/annotation-related imports. | |
However, it can't currently work without fixing a few things upstream. | |
Problems with LazyLoader | |
---------------------- | |
importlib.util.LazyLoader is core to the functionality of this context manager, but it has | |
some issues that need fixing: | |
1. Suppose a module was imported with _LazyFinder, but wasn't used. Then, it was | |
imported again. During the latter import, the module will be fully resolved | |
immediately, no longer being lazy. This is because importlib.util._LazyModule doesn't | |
special-case "__spec__" being requested as an attribute, and the regular import | |
machinery requests that while doing its search for a cached module in sys.modules. | |
This makes usage of LazyLoader via a context manager like below difficult in practice, | |
since many modules are meant to be imported in multiple modules in the same project, | |
even standard library ones like typing. | |
2. If threading was not imported before placing this finder on the meta path, this | |
would fail in 3.12+ because threading is imported within LazyLoader.exec_module | |
(for RLock); things become circular. This is problematic because it can place a hard | |
requirement to import threading (and whatever it imports) before using LazyLoader | |
with a customized meta path finder, as demonstrated below. | |
(Potential) Solutions for LazyLoader | |
------------------------------------ | |
1. Have an early branch within _LazyModule.__getattribute__ that returns __spec__ | |
upon request *without* doing the loading, a la:: | |
if attr == "__spec__": | |
return __spec__ | |
2. A few ideas came to mind: | |
a. Some other users of LazyLoader, e.g. Mercurial, work around this by placing | |
"threading" on an exclusion list? | |
(see https://repo.mercurial-scm.org/hg/file/tip/hgdemandimport/__init__.py) | |
I can't get it to work, plus it makes the basic recipe less simple by needing | |
visible special-casing from the start. However, maybe such a special-casing | |
interface should be exposed to users anyway (see _LazyFinder's docstring for | |
more details). | |
b. If we try to work around this by finding threading's spec beforehand so that we | |
use its default loader to load it when LazyLoader.exec_module is called, that | |
might cause issues because then all the import statements within threading will | |
trigger with _LazyFinder still on the path. Doesn't work. | |
c. If we use _thread for its RLock instead, that *might* work, but it also might | |
cause issues with gevent (see https://github.com/python/cpython/issues/117983). | |
_thread is used by importlib._bootstrap though, and gevent manages with that, | |
so ... this might be the easiest solution. | |
""" | |
from __future__ import annotations | |
import importlib.machinery | |
import importlib.util | |
import sys | |
# NOTE: Issues with LazyLoader, 2. | |
# To demonstrate, attempt to run this file before and after uncommenting the below import. | |
# import threading | |
class _LazyFinder: | |
"""A module spec finder that wraps a spec's loader, if it exists, with LazyLoader. | |
It uses the rest of the meta path to actually find the spec. | |
Class Attributes | |
---------------- | |
excluded_modules: set[str] | |
Module names that will not be lazily loaded. | |
Plenty of reasons exist for this escape hatch beyond the threading-related one | |
mentioned above. For example, many code bases use the following pattern to | |
branch based on the successful import of a module:: | |
try: | |
import ... # Attempt an import | |
except ...: | |
# If the import fails, do something else. | |
... | |
The downside of lazily importing is that the except clause won't trigger in | |
circumstances where the module is found but raises an exception | |
*during execution*; that execution is delayed until first attribute access, which | |
if we're unlucky, will happen during user-visible "runtime". | |
This common pattern is at least one reason why PEP 690 tried to specify try/except | |
and with blocks as escape hatches within which import statements are always eager. | |
We could technically figure out if we're in a try-except/with block by traversing | |
the call stack and inspecting the surrounding bytecode, but that's a) expensive, | |
b) Python runtime/implementation dependent, and c) potentially brittle even if | |
specifically targeting one implementation. | |
excluded_loaders: tuple[importlib.abc.Loader, ...] | |
Loader types that will not be wrapped. | |
C API functions like PyModule_Get* might try to access C state that isn't | |
initialized because _LazyModule.__getattribute__ hasn't been triggered. | |
According to Brett Cannon, the simplest solution is to skip being lazy for | |
non-source modules on which those C API functions can be called | |
(see https://github.com/python/cpython/issues/85963). We can identify those via | |
their intended loaders, which is why excluded_loaders is populated by default with | |
BuiltinImporter and ExtensionFileLoader. | |
Notes | |
----- | |
The class attributes are late additions that aren't fully fleshed out. The intention | |
is eventually provide interfaces so that these collections are modifiable by users for | |
their use cases, e.g. methods that temporarily add/subtract from them. | |
""" | |
excluded_modules: set[str] = set() | |
excluded_loaders = ( | |
importlib.machinery.BuiltinImporter, | |
importlib.machinery.ExtensionFileLoader, | |
) | |
@classmethod | |
def find_spec(cls, fullname: str, path=None, target=None, /): # pyright: ignore | |
"""Try to find a spec for "fullname" using the other finders on the meta path.""" | |
for finder in sys.meta_path: | |
if finder is not cls: | |
spec = finder.find_spec(fullname, path, target) # pyright: ignore | |
if spec is not None: | |
break | |
else: | |
msg = f"No module named {fullname!r}" | |
raise ModuleNotFoundError(msg, name=fullname) | |
if ( | |
fullname not in cls.excluded_modules | |
and spec.loader is not None | |
and not isinstance(spec.loader, cls.excluded_loaders) | |
): | |
spec.loader = importlib.util.LazyLoader(spec.loader) | |
return spec | |
class _LazyFinderContext: | |
"""Temporarily "lazify" some types of import statements in the runtime context.""" | |
def __enter__(self) -> None: | |
if _LazyFinder not in sys.meta_path: | |
sys.meta_path.insert(0, _LazyFinder) | |
def __exit__(self, *exc_info: object) -> None: | |
try: | |
sys.meta_path.remove(_LazyFinder) | |
except ValueError: | |
# Potentially put a warning here about sys.meta_path being modified from under | |
# the context manager's feet. | |
pass | |
lazy_finder = _LazyFinderContext() | |
with lazy_finder: | |
import typing | |
# NOTE: Issues with LazyLoader, 1. | |
# To demonstrate, look at the the import time results for this file with | |
# 'python -X importtime -c "import lazyfinder"' before and after uncommenting the two | |
# lines below. | |
# with lazy_finder: | |
# import typing |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment