Last active
September 7, 2016 16:22
-
-
Save sxslex/8e9b3b190a84ed5c5c08893a27d01638 to your computer and use it in GitHub Desktop.
Decodifica um url em partes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# da_urllib.py | |
# by [email protected] | |
import urllib | |
def urldencode(params): | |
resp = {} | |
for item in params.split('&'): | |
if not item: | |
continue | |
ritem = item.split('=') | |
value = None | |
if len(ritem) > 1: | |
value = urllib.unquote_plus(''.join(ritem[1:])) | |
if ritem[0] not in resp: | |
resp[ritem[0]] = value | |
else: | |
if not isinstance(resp[ritem[0]], list): | |
resp[ritem[0]] = [resp[ritem[0]]] | |
resp[ritem[0]].append(value) | |
return resp | |
return dict(( | |
(ritem[0], None if len(ritem) == 1 | |
else urllib.unquote_plus(''.join(ritem[1:]))) | |
for ritem in (item.split('=') | |
for item in params.split('&') if item) | |
)) | |
def extract_url(url): | |
protocol = 'http' | |
host = None | |
path = None | |
fragment = None | |
if '://' in url: | |
rurl = url.split('://') | |
protocol = rurl[0] | |
url = ''.join(rurl[1:]) | |
if '/' in url: | |
rurl = url.split('/') | |
host = rurl[0] | |
url = '/'.join(rurl[1:]) | |
if '?' in url: | |
rurl = url.split('?') | |
path = rurl[0] | |
url = ''.join(rurl[1:]) | |
if '#' in url: | |
rurl = url.split('#') | |
fragment = '#'.join(rurl[1:]) | |
url = rurl[0] | |
return dict( | |
protocol=protocol, | |
host=host, | |
path=path, | |
fragment=fragment, | |
query=urldencode(url) | |
) | |
if __name__ == '__main__': | |
import pprint | |
pprint.pprint((urldencode(''))) | |
# {} | |
pprint.pprint((urldencode('a=SleX&c=cc&b=Slex+%26+CIA&teste'))) | |
# {'a': 'SleX', 'b': 'Slex & CIA', 'c': 'cc', 'teste': None} | |
pprint.pprint( | |
extract_url( | |
url=( | |
'http://www.slex.com.br/teste/aaa' | |
) | |
) | |
) | |
# {'fragment': None, | |
# 'host': 'www.slex.com.br', | |
# 'path': None, | |
# 'protocol': 'http', | |
# 'query': {'teste/aaa': None}} | |
pprint.pprint( | |
extract_url( | |
url=( | |
'http://www.slex.com.br/teste/aaa?' + | |
'a=SleX&c=cc&b=Slex+%26+CIA&teste' | |
) | |
) | |
) | |
# {'fragment': None, | |
# 'host': 'www.slex.com.br', | |
# 'path': 'teste/aaa', | |
# 'protocol': 'http', | |
# 'query': {'a': 'SleX', 'b': 'Slex & CIA', 'c': 'cc', 'teste': None}} | |
pprint.pprint( | |
extract_url( | |
url=( | |
'http://www.slex.com.br/teste/aaa?' + | |
'nome=SleX&nome=Alex&c=cc&b=Slex+%26+CIA&teste#ancora' | |
) | |
) | |
) | |
# {'fragment': 'ancora', | |
# 'host': 'www.slex.com.br', | |
# 'path': 'teste/aaa', | |
# 'protocol': 'http', | |
# 'query': {'b': 'Slex & CIA', | |
# 'c': 'cc', | |
# 'nome': ['SleX', 'Alex'], | |
# 'teste': None}} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment