Skip to content

Instantly share code, notes, and snippets.

@alexpovel
Last active August 9, 2024 17:25
Show Gist options
  • Save alexpovel/df0f4922b973b820f15e357ea951abdc to your computer and use it in GitHub Desktop.
Save alexpovel/df0f4922b973b820f15e357ea951abdc to your computer and use it in GitHub Desktop.
Extract the path of git URLs (ssh, git@, https) in a file system-friendly representation.
"""Extract the path of git URLs in a file system-friendly representation.
For example:
- `[email protected]:user/some/nested/repo.git` -> `user/some/nested/repo`
- `https://example.com/user/repo` -> `user/repo`
The output can be used to `git clone` into directories corresponding to the server path
automatically, e.g. `git clone $1 $(this-script $1)`, where $1 is some git URL.
This script handles nested repos (more path elements than just `owner/repo.git`), which
https://pypi.org/project/giturlparse/ doesn't.
"""
import argparse
import sys
from pathlib import PurePath, PurePosixPath
from urllib.parse import ParseResult, urlparse
def extract_path(raw_url: str) -> str:
"""Accepts a raw git URL and extracts its file-system friendly path."""
url = urlparse(raw_url)
found_path = None
match url:
case ParseResult(scheme="https" | "ssh", path=str(path)):
# These schemes have native concepts of paths, so just reuse.
found_path = path
case ParseResult(scheme="", path=str(path)) if path.startswith("git@"):
# The git-specific format,
# https://git-scm.com/book/en/v2/Git-on-the-Server-The-Protocols
_username, path = path.split("@")
_host, path = path.split(":")
found_path = path
case _:
pass
if found_path is not None:
return str(strip_suffix(force_relative(PurePath(found_path))))
raise ValueError(f"Cannot process {raw_url=} (parsed as {url=}). Invalid format?")
def force_relative[P: PurePath](path: P) -> P:
if path.is_absolute():
path = path.relative_to(PurePosixPath("/").root)
return path
def strip_suffix[P: PurePath](path: P) -> P:
return path.with_suffix("")
def test():
"""Performs self-testing."""
urls_to_result = {
"git": ValueError(), # Not a valid URL
"git@": ValueError(), # Missing stuff
"[email protected]": ValueError(), # Missing user etc.
"[email protected]:repo": "repo",
"[email protected]:owner/repo": "owner/repo",
"[email protected]:owner/repo.git": "owner/repo",
"[email protected]:owner/deeper/repo": "owner/deeper/repo",
"[email protected]:owner/deeper/repo.git": "owner/deeper/repo",
"ssh://[email protected]/repo": "repo",
"ssh://[email protected]/owner/repo": "owner/repo",
"ssh://[email protected]/owner/repo.git": "owner/repo",
"ssh://[email protected]/owner/deeper/repo": "owner/deeper/repo",
"ssh://[email protected]/owner/deeper/repo.git": "owner/deeper/repo",
"https://example.com/repo": "repo",
"https://example.com/owner/repo": "owner/repo",
"https://example.com/owner/repo.git": "owner/repo",
"https://example.com/owner/deeper/repo": "owner/deeper/repo",
"https://example.com/owner/deeper/repo.git": "owner/deeper/repo",
}
for url, expected in urls_to_result.items():
try:
res = extract_path(url)
except Exception as e:
if type(e) is not type(expected):
raise
else:
assert res == expected, f"{res=} and {expected=} deviate"
# Print our own file name so if this output occurs in some intermingled stderr log,
# its origin is clear.
print(f"{__file__}: Self-tests OK", file=sys.stderr)
def main():
parser = argparse.ArgumentParser(
description=__doc__,
)
parser.add_argument("git_url")
parser.add_argument(
"--no-test",
help="Skip running unit tests",
action="store_true",
)
args = parser.parse_args()
if not args.no_test:
test()
print(extract_path(args.git_url))
if __name__ == "__main__":
main()
[project]
name = "git-url-path-extract.py"
version = "0.1.0"
requires-python = ">=3.12"
[build-system]
requires = ["setuptools >= 61.0"]
build-backend = "setuptools.build_meta"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment