Last active
August 9, 2024 17:25
-
-
Save alexpovel/df0f4922b973b820f15e357ea951abdc to your computer and use it in GitHub Desktop.
Extract the path of git URLs (ssh, git@, https) in a file system-friendly representation.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Extract the path of git URLs in a file system-friendly representation. | |
For example: | |
- `[email protected]:user/some/nested/repo.git` -> `user/some/nested/repo` | |
- `https://example.com/user/repo` -> `user/repo` | |
The output can be used to `git clone` into directories corresponding to the server path | |
automatically, e.g. `git clone $1 $(this-script $1)`, where $1 is some git URL. | |
This script handles nested repos (more path elements than just `owner/repo.git`), which | |
https://pypi.org/project/giturlparse/ doesn't. | |
""" | |
import argparse | |
import sys | |
from pathlib import PurePath, PurePosixPath | |
from urllib.parse import ParseResult, urlparse | |
def extract_path(raw_url: str) -> str: | |
"""Accepts a raw git URL and extracts its file-system friendly path.""" | |
url = urlparse(raw_url) | |
found_path = None | |
match url: | |
case ParseResult(scheme="https" | "ssh", path=str(path)): | |
# These schemes have native concepts of paths, so just reuse. | |
found_path = path | |
case ParseResult(scheme="", path=str(path)) if path.startswith("git@"): | |
# The git-specific format, | |
# https://git-scm.com/book/en/v2/Git-on-the-Server-The-Protocols | |
_username, path = path.split("@") | |
_host, path = path.split(":") | |
found_path = path | |
case _: | |
pass | |
if found_path is not None: | |
return str(strip_suffix(force_relative(PurePath(found_path)))) | |
raise ValueError(f"Cannot process {raw_url=} (parsed as {url=}). Invalid format?") | |
def force_relative[P: PurePath](path: P) -> P: | |
if path.is_absolute(): | |
path = path.relative_to(PurePosixPath("/").root) | |
return path | |
def strip_suffix[P: PurePath](path: P) -> P: | |
return path.with_suffix("") | |
def test(): | |
"""Performs self-testing.""" | |
urls_to_result = { | |
"git": ValueError(), # Not a valid URL | |
"git@": ValueError(), # Missing stuff | |
"[email protected]": ValueError(), # Missing user etc. | |
"[email protected]:repo": "repo", | |
"[email protected]:owner/repo": "owner/repo", | |
"[email protected]:owner/repo.git": "owner/repo", | |
"[email protected]:owner/deeper/repo": "owner/deeper/repo", | |
"[email protected]:owner/deeper/repo.git": "owner/deeper/repo", | |
"ssh://[email protected]/repo": "repo", | |
"ssh://[email protected]/owner/repo": "owner/repo", | |
"ssh://[email protected]/owner/repo.git": "owner/repo", | |
"ssh://[email protected]/owner/deeper/repo": "owner/deeper/repo", | |
"ssh://[email protected]/owner/deeper/repo.git": "owner/deeper/repo", | |
"https://example.com/repo": "repo", | |
"https://example.com/owner/repo": "owner/repo", | |
"https://example.com/owner/repo.git": "owner/repo", | |
"https://example.com/owner/deeper/repo": "owner/deeper/repo", | |
"https://example.com/owner/deeper/repo.git": "owner/deeper/repo", | |
} | |
for url, expected in urls_to_result.items(): | |
try: | |
res = extract_path(url) | |
except Exception as e: | |
if type(e) is not type(expected): | |
raise | |
else: | |
assert res == expected, f"{res=} and {expected=} deviate" | |
# Print our own file name so if this output occurs in some intermingled stderr log, | |
# its origin is clear. | |
print(f"{__file__}: Self-tests OK", file=sys.stderr) | |
def main(): | |
parser = argparse.ArgumentParser( | |
description=__doc__, | |
) | |
parser.add_argument("git_url") | |
parser.add_argument( | |
"--no-test", | |
help="Skip running unit tests", | |
action="store_true", | |
) | |
args = parser.parse_args() | |
if not args.no_test: | |
test() | |
print(extract_path(args.git_url)) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[project] | |
name = "git-url-path-extract.py" | |
version = "0.1.0" | |
requires-python = ">=3.12" | |
[build-system] | |
requires = ["setuptools >= 61.0"] | |
build-backend = "setuptools.build_meta" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment