Created
May 26, 2013 23:40
-
-
Save mitsuhiko/5654459 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Helpers for bytes handling | |
# For 3.2, we deliberately require applications that | |
# handle improperly quoted URLs to do their own | |
# decoding and encoding. If valid use cases are | |
# presented, we may relax this by using latin-1 | |
# decoding internally for 3.3 | |
_implicit_encoding = 'ascii' | |
_implicit_errors = 'strict' | |
def _noop(obj): | |
return obj | |
def _encode_result(obj, encoding=_implicit_encoding, | |
errors=_implicit_errors): | |
return obj.encode(encoding, errors) | |
def _decode_args(args, encoding=_implicit_encoding, | |
errors=_implicit_errors): | |
return tuple(x.decode(encoding, errors) if x else '' for x in args) | |
def _coerce_args(*args): | |
# Invokes decode if necessary to create str args | |
# and returns the coerced inputs along with | |
# an appropriate result coercion function | |
# - noop for str inputs | |
# - encoding function otherwise | |
str_input = isinstance(args[0], str) | |
for arg in args[1:]: | |
# We special-case the empty string to support the | |
# "scheme=''" default argument to some functions | |
if arg and isinstance(arg, str) != str_input: | |
raise TypeError("Cannot mix str and non-str arguments") | |
if str_input: | |
return args + (_noop,) | |
return _decode_args(args) + (_encode_result,) | |
# Result objects are more helpful than simple tuples | |
class _ResultMixinStr(object): | |
"""Standard approach to encoding parsed results from str to bytes""" | |
__slots__ = () | |
def encode(self, encoding='ascii', errors='strict'): | |
return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) | |
class _ResultMixinBytes(object): | |
"""Standard approach to decoding parsed results from bytes to str""" | |
__slots__ = () | |
def decode(self, encoding='ascii', errors='strict'): | |
return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) | |
class _NetlocResultMixinBase(object): | |
"""Shared methods for the parsed result objects containing a netloc element""" | |
__slots__ = () | |
@property | |
def username(self): | |
return self._userinfo[0] | |
@property | |
def password(self): | |
return self._userinfo[1] | |
@property | |
def hostname(self): | |
hostname = self._hostinfo[0] | |
if not hostname: | |
hostname = None | |
elif hostname is not None: | |
hostname = hostname.lower() | |
return hostname | |
@property | |
def port(self): | |
port = self._hostinfo[1] | |
if port is not None: | |
port = int(port, 10) | |
# Return None on an illegal port | |
if not ( 0 <= port <= 65535): | |
return None | |
return port | |
class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): | |
__slots__ = () | |
@property | |
def _userinfo(self): | |
netloc = self.netloc | |
userinfo, have_info, hostinfo = netloc.rpartition('@') | |
if have_info: | |
username, have_password, password = userinfo.partition(':') | |
if not have_password: | |
password = None | |
else: | |
username = password = None | |
return username, password | |
@property | |
def _hostinfo(self): | |
netloc = self.netloc | |
_, _, hostinfo = netloc.rpartition('@') | |
_, have_open_br, bracketed = hostinfo.partition('[') | |
if have_open_br: | |
hostname, _, port = bracketed.partition(']') | |
_, have_port, port = port.partition(':') | |
else: | |
hostname, have_port, port = hostinfo.partition(':') | |
if not have_port: | |
port = None | |
return hostname, port | |
class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): | |
__slots__ = () | |
@property | |
def _userinfo(self): | |
netloc = self.netloc | |
userinfo, have_info, hostinfo = netloc.rpartition(b'@') | |
if have_info: | |
username, have_password, password = userinfo.partition(b':') | |
if not have_password: | |
password = None | |
else: | |
username = password = None | |
return username, password | |
@property | |
def _hostinfo(self): | |
netloc = self.netloc | |
_, _, hostinfo = netloc.rpartition(b'@') | |
_, have_open_br, bracketed = hostinfo.partition(b'[') | |
if have_open_br: | |
hostname, _, port = bracketed.partition(b']') | |
_, have_port, port = port.partition(b':') | |
else: | |
hostname, have_port, port = hostinfo.partition(b':') | |
if not have_port: | |
port = None | |
return hostname, port | |
from collections import namedtuple | |
_DefragResultBase = namedtuple('DefragResult', 'url fragment') | |
_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment') | |
_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment') | |
# For backwards compatibility, alias _NetlocResultMixinStr | |
# ResultBase is no longer part of the documented API, but it is | |
# retained since deprecating it isn't worth the hassle | |
ResultBase = _NetlocResultMixinStr | |
# Structured result objects for string data | |
class DefragResult(_DefragResultBase, _ResultMixinStr): | |
__slots__ = () | |
def geturl(self): | |
if self.fragment: | |
return self.url + '#' + self.fragment | |
else: | |
return self.url | |
class SplitResult(_SplitResultBase, _NetlocResultMixinStr): | |
__slots__ = () | |
def geturl(self): | |
return urlunsplit(self) | |
class ParseResult(_ParseResultBase, _NetlocResultMixinStr): | |
__slots__ = () | |
def geturl(self): | |
return urlunparse(self) | |
# Structured result objects for bytes data | |
class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): | |
__slots__ = () | |
def geturl(self): | |
if self.fragment: | |
return self.url + b'#' + self.fragment | |
else: | |
return self.url | |
class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): | |
__slots__ = () | |
def geturl(self): | |
return urlunsplit(self) | |
class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): | |
__slots__ = () | |
def geturl(self): | |
return urlunparse(self) | |
# Set up the encode/decode result pairs | |
def _fix_result_transcoding(): | |
_result_pairs = ( | |
(DefragResult, DefragResultBytes), | |
(SplitResult, SplitResultBytes), | |
(ParseResult, ParseResultBytes), | |
) | |
for _decoded, _encoded in _result_pairs: | |
_decoded._encoded_counterpart = _encoded | |
_encoded._decoded_counterpart = _decoded | |
_fix_result_transcoding() | |
del _fix_result_transcoding |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment