Skip to content

Instantly share code, notes, and snippets.

@mrcljx
Created September 19, 2024 15:39
Show Gist options
  • Save mrcljx/8e99a7f9fb924a573416f4365ee15873 to your computer and use it in GitHub Desktop.
Save mrcljx/8e99a7f9fb924a573416f4365ee15873 to your computer and use it in GitHub Desktop.
Poetry (1.3) to UV Migration Script
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "deepdiff",
# "editorconfig",
# "pathspec",
# "poetry-core",
# "rich",
# "tomlkit",
# "typer",
# "typing-extensions",
# ]
# ///
"""
MIT License
Copyright (c) 2024 Marcel Jackwerth
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
---
This script does not claim to be complete. Please tweak it to your needs
through trial an error. It has been used to migrate a monorep that used
Poetry 1.3 originally.
Usage:
`uv run --no-project migrate_to_uv.py`
"""
from __future__ import annotations
import contextlib
import dataclasses
import functools
import logging
import os
import pathlib
import shlex
import subprocess
import sys
import tempfile
from collections.abc import Iterator, Mapping, Sequence
from pathlib import Path
from subprocess import check_call, check_output
from typing import Annotated, TypedDict
import deepdiff
import editorconfig
import tomlkit.items
import typer
from pathspec import PathSpec, Pattern
from pathspec.patterns.gitwildmatch import GitWildMatchPattern
from poetry.core.constraints.version import Version, VersionConstraint
from poetry.core.constraints.version.parser import parse_constraint, parse_marker_version_constraint
from poetry.core.packages.utils.utils import create_nested_marker
from typing_extensions import Self
_LOGGER = logging.getLogger(__name__)
@dataclasses.dataclass(frozen=True, kw_only=True)
class Project:
pyproject_path: Path
@functools.cached_property
def root(self) -> Path:
return self.pyproject_path.parent
@functools.cached_property
def poetry_lock_path(self) -> Path:
return self.root / "poetry.lock"
@functools.cached_property
def uv_lock_path(self) -> Path:
return self.root / "uv.lock"
class Meta(TypedDict, total=False):
version: str
extras: list[str]
path: str
git: str
rev: str
@dataclasses.dataclass(frozen=True, kw_only=True)
class Dependency:
name: str
_meta: Meta
_version_constraint: VersionConstraint | None
@classmethod
def parse(cls, name: str, meta: Meta | str) -> Self:
if isinstance(meta, str):
version = meta
meta = {"version": meta}
else:
version = meta.get("version")
return cls(
name=name,
_meta=meta,
_version_constraint=parse_constraint(version) if version else None,
)
@functools.cached_property
def path(self) -> str | None:
return self._meta.get("path")
@functools.cached_property
def git(self) -> str | None:
if not (repo := self._meta.get("git")):
return None
if rev := self._meta.get("rev"):
return f"{repo}@{rev}"
return repo
@functools.cached_property
def constraint(self) -> str:
if not self._version_constraint:
return ""
elif self._version_constraint.is_any():
return ""
elif isinstance(self._version_constraint, Version):
return f" =={self._version_constraint}"
else:
return f" {self._version_constraint}"
@functools.cached_property
def name_with_extras(self) -> str:
lhs = self.name
if extras := self._meta.get("extras"):
lhs += f"[{','.join(extras)}]"
return lhs
@functools.cached_property
def source(self) -> Mapping[str, object] | None:
rv: dict[str, object] = {}
if self.path:
rv["path"] = self.path
if self._meta.get("develop"):
rv["editable"] = True
elif git := self._meta.get("git"):
rv["git"] = git
if rev := self._meta.get("rev"):
rv["rev"] = rev
return rv or None
@functools.cached_property
def markers(self) -> str | None:
if markers := self._meta.get("markers"):
return markers
if python_str := self._meta.get("python"):
constraint = parse_constraint(python_str)
marker = create_nested_marker("python_version", constraint)
return str(marker)
return None
def __str__(self) -> str:
rv = self.name_with_extras
if not self.path and not self.git:
rv += self.constraint
if markers := self.markers:
rv += f"; {markers}"
return rv
@dataclasses.dataclass(frozen=True, kw_only=True)
class Context:
verbose: int
def run(self, cmd: Sequence[str | Path]) -> None:
if self.verbose and cmd[0] == "uv":
verbose_arg = "v" * self.verbose
cmd = [cmd[0], f"-{verbose_arg}", *cmd[1:]]
cmd_str = shlex.join(map(str, cmd))
_LOGGER.info("$ %s", cmd_str)
try:
check_call(cmd)
except subprocess.CalledProcessError:
sys.exit(f"Failed to run ({os.getcwd()}): {cmd_str}")
@contextlib.contextmanager
def chdir(self, new: Path) -> Iterator[None]:
old = Path.cwd()
_LOGGER.debug("chdir: %s", new)
os.chdir(new)
try:
yield
finally:
_LOGGER.debug("chdir: %s", new)
os.chdir(old)
def get_ident(path: Path) -> str:
try:
props = editorconfig.get_properties(path)
except editorconfig.EditorConfigError:
props = {}
style = props.get("indent_style", "space")
size = int(props.get("indent_size", "4"))
chars = {"space": " ", "tab": "\t"}
return chars[style] * size
def prepare_pyproject(ctx: Context, project: Project) -> None:
doc_str = project.pyproject_path.read_text()
doc = tomlkit.parse(doc_str)
# HACK: tomlkit does not support prepending to the document
if "project" not in doc:
doc_str = "[project]\n\n" + doc_str
doc = tomlkit.parse(doc_str)
project_node = doc["project"]
assert isinstance(project_node, dict)
if not (tool_node := doc.get("tool")):
return
assert isinstance(tool_node, dict)
if not (poetry_node := tool_node.get("poetry")):
return
assert isinstance(poetry_node, dict)
project_node["name"] = poetry_node["name"]
if "version" in project_node.get("dynamic", []):
pass
elif poetry_node.get("version", "0.0.0") != "0.0.0":
project_node["version"] = poetry_node["version"]
else:
dynamic = project_node.setdefault("dynamic", tomlkit.array())
if "version" not in dynamic:
dynamic.append("version")
_LOGGER.info("Writing to %s", project.pyproject_path)
project.pyproject_path.write_text(tomlkit.dumps(doc))
def migrate_pyproject(ctx: Context, project: Project) -> None:
indent = get_ident(project.pyproject_path)
doc = tomlkit.parse(project.pyproject_path.read_text())
project_node = doc["project"]
assert isinstance(project_node, dict)
if not (tool_node := doc.get("tool")):
return
assert isinstance(tool_node, dict)
if not (poetry_node := tool_node.get("poetry")):
return
assert isinstance(poetry_node, dict)
@functools.cache
def ensure_uv_node() -> tomlkit.items.Table:
return tool_node.setdefault("uv", tomlkit.table())
arrays_to_close = list[tomlkit.items.Array]()
@functools.cache
def ensure_deps_node() -> tomlkit.items.Array:
rv = project_node.setdefault("dependencies", tomlkit.array())
rv.clear()
arrays_to_close.append(rv)
return rv
@functools.cache
def ensure_sources_node() -> tomlkit.items.Table:
rv = ensure_uv_node().setdefault("sources", tomlkit.table())
rv.clear()
return rv
def add_dep(target: tomlkit.items.Array, dep: Dependency) -> None:
if dep.name == "python":
if requires_python := dep._version_constraint:
project_node["requires-python"] = str(requires_python)
return
target.add_line(str(dep), indent=indent)
if source := dep.source:
source_table = ensure_sources_node()[dep.name] = tomlkit.inline_table()
source_table.update(source)
poetry_deps_node = poetry_node.get("dependencies", tomlkit.table())
assert isinstance(poetry_deps_node, tomlkit.items.Table)
for name, meta in poetry_deps_node.items():
dep = Dependency.parse(name, meta)
add_dep(ensure_deps_node(), dep)
@functools.cache
def ensure_dev_deps() -> tomlkit.items.Array:
rv = ensure_uv_node().setdefault("dev-dependencies", tomlkit.array())
rv.clear()
arrays_to_close.append(rv)
return rv
@functools.cache
def ensure_optional_deps() -> tomlkit.items.Table:
rv = project_node.setdefault("optional-dependencies", tomlkit.table())
rv.clear()
return rv
# legacy
poetry_dev_deps_node = poetry_node.get("dev-dependencies", tomlkit.table())
assert isinstance(poetry_dev_deps_node, tomlkit.items.Table)
for name, meta in poetry_dev_deps_node.items():
dep = Dependency.parse(name, meta)
add_dep(ensure_dev_deps(), dep)
# modern
poetry_group_node = poetry_node.get("group", tomlkit.table())
assert isinstance(poetry_group_node, tomlkit.items.Table)
for group_name, group in poetry_group_node.items():
group_deps = group.get("dependencies", tomlkit.table())
assert isinstance(group_deps, tomlkit.items.Table)
deps = [Dependency.parse(name, meta) for name, meta in group_deps.items()]
if group_name == "dev":
for dep in deps:
add_dep(ensure_dev_deps(), dep)
else:
extra_node = ensure_optional_deps().setdefault(group_name, tomlkit.array())
arrays_to_close.append(extra_node)
for dep in deps:
add_dep(extra_node, dep)
for a in arrays_to_close:
a.add_line(indent="")
_LOGGER.info("Writing to %s", project.pyproject_path)
project.pyproject_path.write_text(tomlkit.dumps(doc))
def migrate_poetry_lock(ctx: Context, project: Project) -> None:
doc = tomlkit.parse(project.poetry_lock_path.read_text())
out_doc = tomlkit.parse(project.pyproject_path.read_text())
packages = doc["package"]
assert isinstance(packages, tomlkit.items.AoT)
arrays_to_close = list[tomlkit.items.Array]()
@functools.cache
def ensure_dep_node() -> tomlkit.items.Array:
uv_node = out_doc.setdefault("tool", tomlkit.table()).setdefault("uv", tomlkit.table())
rv = uv_node.setdefault("constraint-dependencies", tomlkit.array())
rv.clear()
arrays_to_close.append(rv)
return rv
for package in packages:
assert isinstance(package, dict)
line = f"{package['name']} =={package['version']}"
constraint = package.get("python-versions", "*")
if constraint != "*":
parsed = parse_marker_version_constraint(constraint)
marker = create_nested_marker("python_version", parsed)
line += f"; {marker}"
ensure_dep_node().add_line(line, indent=get_ident(project.pyproject_path))
for a in arrays_to_close:
a.add_line(indent="")
_LOGGER.info("Writing to %s", project.pyproject_path)
project.pyproject_path.write_text(tomlkit.dumps(out_doc))
project.uv_lock_path.unlink(missing_ok=True)
ctx.run(["uv", "lock"])
# remove the constraints again
if tool_node := out_doc.get("tool"):
assert isinstance(tool_node, dict)
if uv_node := tool_node.get("uv"):
assert isinstance(uv_node, dict)
uv_node.pop("constraint-dependencies", None)
project.pyproject_path.write_text(tomlkit.dumps(out_doc))
# re-lock without the constraints
ctx.run(["uv", "lock"])
# For some `resolution-markers` become a mess, so remove it, and lock it again
lock_doc = tomlkit.parse(project.uv_lock_path.read_text())
lock_doc.pop("resolution-markers", None)
project.uv_lock_path.write_text(tomlkit.dumps(lock_doc))
# re-lock without the markers
ctx.run(["uv", "lock"])
def check_uv_lock(ctx: Context, project: Project) -> None:
poetry_lock = tomlkit.parse(project.poetry_lock_path.read_text())
uv_lock = tomlkit.parse(project.uv_lock_path.read_text())
poetry_package_node = poetry_lock["package"]
assert isinstance(poetry_package_node, tomlkit.items.AoT)
poetry_packages = {package["name"]: package["version"] for package in poetry_package_node}
uv_package_node = uv_lock["package"]
assert isinstance(uv_package_node, tomlkit.items.AoT)
uv_packages = {package["name"]: package["version"] for package in uv_package_node}
print(deepdiff.DeepDiff(poetry_packages, uv_packages).pretty())
def uv_pip_compile(
ctx: Context, pyproject_path: Path, constraints_path: Path, output_path: Path
) -> None:
ctx.run(
[
"uv",
"pip",
"compile",
"--universal",
"--no-header",
"--no-annotate",
"--constraint",
constraints_path,
"--output-file",
output_path,
pyproject_path,
],
)
def uv_pip_freeze(output_path: Path) -> None:
rv = check_output(["uv", "pip", "freeze"])
output_path.write_bytes(rv)
def uv_sync(ctx: Context) -> None:
tmp_path = pathlib.Path(tempfile.TemporaryDirectory().name)
before_path = tmp_path / "requirements-before.txt"
after_path = tmp_path / "requirements-after.txt"
uv_pip_freeze(before_path)
ctx.run(["uv", "sync", "--all-extras"])
uv_pip_freeze(after_path)
print("Done. Check the diff:")
print("$ diff", before_path, after_path)
def delete_poetry_config(ctx: Context, project: Project) -> None:
project.poetry_lock_path.unlink(missing_ok=True)
doc = tomlkit.parse(project.pyproject_path.read_text())
if build_system_node := doc.get("build-system"):
if build_system_node.get("build-backend", "").startswith("poetry."):
doc.remove("build-system")
if tool_node := doc.get("tool"):
assert isinstance(tool_node, dict)
with contextlib.suppress(KeyError):
tool_node.pop("poetry")
project.pyproject_path.write_text(tomlkit.dumps(doc))
def build_path_spec(root: Path) -> PathSpec:
patterns = list[Pattern]()
patterns.extend(map(GitWildMatchPattern, [".git", "node_modules"]))
patterns.extend(Walker._gitignore_patterns(root))
return PathSpec(patterns=patterns)
@dataclasses.dataclass(frozen=True, kw_only=True)
class Walker:
root: Path
ignore: PathSpec
search: PathSpec
def __iter__(self) -> Iterator[Path]:
yield from self._walk(self.root)
def _walk(self, current: Path) -> Iterator[Path]:
for entry in os.scandir(current):
path = Path(entry.path)
relative_path = path.relative_to(self.root)
if self.ignore.match_file(relative_path):
continue
if self.search.match_file(relative_path):
yield path
if entry.is_dir():
yield from self._walk_into(path)
def _walk_into(self, path: Path) -> Iterator[Path]:
new_patterns = list(self._gitignore_patterns(path))
if not new_patterns:
return self._walk(path)
_LOGGER.debug("Found new .gitignore patterns: %s", new_patterns)
ignore = PathSpec(patterns=[*self.ignore.patterns, *new_patterns])
walker = dataclasses.replace(self, ignore=ignore)
return walker._walk(path)
@staticmethod
def _gitignore_patterns(root: Path) -> Iterator[Pattern]:
gitignore_path = root / ".gitignore"
if gitignore_path.exists():
for line in gitignore_path.read_text().splitlines():
yield GitWildMatchPattern(line)
def find_projects(path: Path, *, recurse: bool) -> Iterator[Project]:
if path.is_file():
yield Project(pyproject_path=path)
return
assert path.is_dir()
if not recurse:
yield Project(pyproject_path=path / "pyproject.toml")
return
ignore = build_path_spec(path)
search = PathSpec(patterns=[GitWildMatchPattern("pyproject.toml")])
for pyproject_path in Walker(root=path, ignore=ignore, search=search):
yield Project(pyproject_path=pyproject_path)
def resolve_path(path: Path | None) -> tuple[Path | None, Path]:
if not path:
return path, Path.cwd()
path = Path.cwd() / path
assert path.exists()
if path.is_dir():
root = path
path = None
else:
root = path.parent
os.chdir(root)
return path, root
def process_path(path: Path | None) -> Path:
if not path:
return Path.cwd()
elif path.is_file():
os.chdir(path.parent)
return path.relative_to(path.parent)
elif path.is_dir():
os.chdir(path)
return Path(".")
else:
raise ValueError(f"Invalid path: {path}")
def main(
path: Annotated[Path | None, typer.Argument()] = None,
*,
verbose: Annotated[int, typer.Option("--verbose", "-v", count=True)] = 0,
sync: bool = False,
delete: bool = False,
recurse: bool = True,
lock: bool = True,
) -> None:
logging.basicConfig(
level={0: logging.INFO}.get(verbose, logging.DEBUG),
format="%(message)s",
)
path = process_path(path)
ctx = Context(verbose=verbose)
projects = list(find_projects(path, recurse=recurse))
for project in projects:
_LOGGER.info("Preparing: %s", project.root)
with ctx.chdir(project.root):
prepare_pyproject(ctx, project)
for project in projects:
_LOGGER.info("Processing: %s", project.root)
with ctx.chdir(project.root):
migrate_pyproject(ctx, project)
if lock:
for project in projects:
if project.poetry_lock_path.exists():
_LOGGER.info("Processing: %s", project.root)
with ctx.chdir(project.root):
migrate_poetry_lock(ctx, project)
check_uv_lock(ctx, project)
if sync:
for project in projects:
if project.poetry_lock_path.exists():
_LOGGER.info("Syncing: %s", project.root)
with ctx.chdir(project.root):
uv_sync(ctx)
_LOGGER.info("Stopping")
break
if delete:
for project in projects:
_LOGGER.info("Stripping: %s", project)
delete_poetry_config(ctx, project)
if __name__ == "__main__":
typer.run(main)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment