Created
November 5, 2020 03:53
-
-
Save mhasbini/9f2070256b46238ad1d82e27931bd07e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import httpy | |
req = httpy.get('http://httpbin.org/robots.txt') | |
req.status # => 200 | |
req.data # => 'User-agent: *\nDisallow: /deny' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from parsers import parse_url, parse_response | |
from request_helper import get as raw_get | |
class Result: | |
def __init__(self, status, data): | |
self.status = status | |
self.data = data | |
def get(url): | |
return Result(*parse_response(raw_get(*parse_url(url)))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyparsing as pp | |
def parse_url(url): | |
"""Pase url based on this format: | |
http://[host[:port]]path[?query][#fragment] | |
>>> parse_url('http://httpbin.org/') | |
('httpbin.org', 80, '/') | |
>>> parse_url('http://httpbin.org/robots.txt') | |
('httpbin.org', 80, '/robots.txt') | |
>>> parse_url('http://test:1234/lorem?a=b#c') | |
('test', 1234, '/lorem?a=b') | |
>>> parse_url('https://mhasbini.com/') | |
Traceback (most recent call last): | |
... | |
ValueError: Invalid URL | |
>>> parse_url('httpmhasbini.com') | |
Traceback (most recent call last): | |
... | |
ValueError: Invalid URL | |
""" | |
host_pp = pp.Word(pp.alphanums + '.' + pp.alphas).setResultsName('host') | |
port_pp = pp.pyparsing_common.signed_integer.setResultsName('port') | |
path_pp = pp.Combine('/' + pp.Optional(pp.Word(pp.srange("[a-zA-Z0-9.-_~!$&'()*+,;=:@]")))).setResultsName('path') | |
fragment_pp = pp.Optional('#' + pp.Word(pp.srange("[a-zA-Z0-9/?"))).setResultsName('fragment') | |
syntax_pp = 'http://' + host_pp + pp.Optional(':' + port_pp) + path_pp + fragment_pp | |
try: | |
result = syntax_pp.parseString(url) | |
except pp.ParseException: | |
raise ValueError('Invalid URL') | |
return result.get('host'), result.get('port', 80), result.get('path') | |
def parse_response(raw_response): | |
"""Parse raw http response and return status code and body. | |
>>> parse_response('HTTP/1.1 200 OK\\r\\nDate: Thu, 05 Nov 2020 03:22:48 GMT\\r\\nContent-Type: text/plain\\r\\nContent-Length: 30\\r\\nConnection: close\\r\\nServer: gunicorn/19.9.0\\r\\nAccess-Control-Allow-Origin: *\\r\\nAccess-Control-Allow-Credentials: true\\r\\n\\r\\nUser-agent: *\\nDisallow: /deny\\n') | |
(200, 'User-agent: *\\nDisallow: /deny') | |
>>> parse_response('lorem ipsum') | |
Traceback (most recent call last): | |
... | |
ValueError: Invalid raw response | |
""" | |
DELIMITER = '\r\n\r\n' | |
status_pp = pp.pyparsing_common.signed_integer.setResultsName('status') | |
body_pp = pp.SkipTo(pp.Regex(r'$')).setResultsName('body') | |
response_pp = pp.LineStart() + 'HTTP/1.1' + status_pp + pp.SkipTo(DELIMITER) + body_pp + pp.LineEnd() | |
try: | |
result = response_pp.parseString(raw_response) | |
except pp.ParseException: | |
raise ValueError('Invalid raw response') | |
return result.get('status'), result.get('body') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import socket | |
class ConnectionError(OSError): | |
"""Raised when a socket connection fail for any reason""" | |
pass | |
def get(host, port, path): | |
"""Open connection and send GET HTTP request and return raw response | |
>>> get('httpbin.org', 80, '/robots.txt') # doctest:+ELLIPSIS | |
'HTTP/1.1 200 OK\\r\\nDate: ...\\r\\nContent-Type: text/plain\\r\\nContent-Length: 30\\r\\nConnection: close\\r\\nServer: gunicorn/19.9.0\\r\\nAccess-Control-Allow-Origin: *\\r\\nAccess-Control-Allow-Credentials: true\\r\\n\\r\\nUser-agent: *\\nDisallow: /deny\\n' | |
>>> get('mhasbini.com', 1234, '/robots.txt') | |
Traceback (most recent call last): | |
... | |
tt2.ConnectionError: [Errno 113] No route to host | |
""" | |
# Generate request message | |
request_m = f'GET {path} HTTP/1.1\r\n' | |
request_m += f'Host: {host}:{port}\r\n' | |
request_m += 'Connection: close\r\n' | |
request_m += '\r\n' | |
try: | |
# AF_INET -> ipv4 | |
# SOCK_STREAM -> TCP | |
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
sock.connect((host, port)) | |
sock.sendall(request_m.encode()) | |
# Get data 1024 bytes at a time | |
data = b'' | |
while True: | |
_buffer = sock.recv(1024) | |
if not _buffer: | |
break | |
data += _buffer | |
sock.close() | |
return data.decode() | |
except OSError as e: | |
raise ConnectionError(str(e)) from None | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment