Skip to content

Instantly share code, notes, and snippets.

@bukzor
Last active July 29, 2024 19:42
Show Gist options
  • Save bukzor/c61f8d0ed5a6968ea0356860bc4971e5 to your computer and use it in GitHub Desktop.
Save bukzor/c61f8d0ed5a6968ea0356860bc4971e5 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
I attempt to showcase and fix a problem in `urllib.parse.urlunsplit` by fixing it and comparing it to the original.
See `DEMO`, below for results.
TL;DR: It's currently impossible to obtain relative URIs (like `file:selfie.png`) from urlunsplit, but we could fix it.
"""
from urllib.parse import urlsplit, urlunsplit
SplitResult = tuple[str, str, str, str, str]
def urlunsplit_new(components: SplitResult, strict: bool = False):
scheme, netloc, path, query, fragment = components
if scheme:
url = [scheme, ":"]
else:
url = []
if netloc or path.startswith("//"):
url.extend(("//", netloc))
if path and not path.startswith("/"):
if strict:
raise ValueError("URI can't represent netloc with relative path")
else:
url.append("/") # status quo: path quietly coerced to absolute
url.append(path)
if query:
url.extend(("?", query))
if fragment:
url.extend(("#", fragment))
return "".join(url)
def test(scheme: str, path: str, slash_count: int):
url = scheme + "/" * slash_count + path
split = urlsplit(url)
unsplit_old = urlunsplit(split)
unsplit_new = urlunsplit_new(split)
resplit_old = urlsplit(unsplit_old)
resplit_new = urlsplit(unsplit_new)
if resplit_old == resplit_new:
return
elif resplit_new == split:
old = "wrong"
new = "fixed"
elif resplit_old == split:
old = "okay!"
new = "derp!"
else:
old = "WRONG!"
new = "BORKD!"
print(repr(url), "-split->", split, "-unsplit->", "...")
print(f"(old, {old})", repr(unsplit_old), "-split->", resplit_old)
print(f"(new, {new})", repr(unsplit_new), "-split->", resplit_new)
print()
def main():
for scheme in ("", "file:"):
for path in ("", "x"):
for slash_count in range(5):
test(scheme, path, slash_count)
DEMO = """
$ python3 demo.py
'////' -split-> SplitResult(scheme='', netloc='', path='//', query='', fragment='') -unsplit-> ...
(old, wrong) '//' -split-> SplitResult(scheme='', netloc='', path='', query='', fragment='')
(new, fixed) '////' -split-> SplitResult(scheme='', netloc='', path='//', query='', fragment='')
'////x' -split-> SplitResult(scheme='', netloc='', path='//x', query='', fragment='') -unsplit-> ...
(old, wrong) '//x' -split-> SplitResult(scheme='', netloc='x', path='', query='', fragment='')
(new, fixed) '////x' -split-> SplitResult(scheme='', netloc='', path='//x', query='', fragment='')
'file:////' -split-> SplitResult(scheme='file', netloc='', path='//', query='', fragment='') -unsplit-> ...
(old, wrong) 'file://' -split-> SplitResult(scheme='file', netloc='', path='', query='', fragment='')
(new, fixed) 'file:////' -split-> SplitResult(scheme='file', netloc='', path='//', query='', fragment='')
'file:x' -split-> SplitResult(scheme='file', netloc='', path='x', query='', fragment='') -unsplit-> ...
(old, wrong) 'file:///x' -split-> SplitResult(scheme='file', netloc='', path='/x', query='', fragment='')
(new, fixed) 'file:x' -split-> SplitResult(scheme='file', netloc='', path='x', query='', fragment='')
'file:////x' -split-> SplitResult(scheme='file', netloc='', path='//x', query='', fragment='') -unsplit-> ...
(old, wrong) 'file://x' -split-> SplitResult(scheme='file', netloc='x', path='', query='', fragment='')
(new, fixed) 'file:////x' -split-> SplitResult(scheme='file', netloc='', path='//x', query='', fragment='')
"""
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment