Last active
October 24, 2019 00:23
-
-
Save brad-anton/ff517025a86754793b680b3c5cfc9797 to your computer and use it in GitHub Desktop.
Certain proxy servers require the Full HTTP request to be included in the same packet as the HTTP CONNECT, however requests seems to split these up into multiple packets. This work around combines the headers and connect into a single send(). Documented here: https://github.com/requests/requests/issues/4884
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
requests_connect_with_headers.py | |
@brad_anton | |
Certain proxy servers require the Full HTTP request to be included in | |
the same packet as the HTTP CONNECT, however requests seems to split | |
these up into multiple packets. This work around combines the headers | |
and connect into a single send(). | |
Documented here: https://github.com/requests/requests/issues/4884 | |
""" | |
from requests import Session | |
from requests.adapters import HTTPAdapter | |
from requests.packages.urllib3 import proxy_from_url | |
from requests.packages.urllib3.connection import VerifiedHTTPSConnection | |
import logging | |
try: | |
import http.client as http_client | |
from http.HTTPStatus import OK | |
except ImportError: | |
# Python 2 | |
import httplib as http_client | |
from httplib import OK | |
http_client.HTTPConnection.debuglevel = 1 | |
class VerifiedHTTPSConnectionWithHeaders(VerifiedHTTPSConnection): | |
def _tunnel(self): | |
"""This is just a simple rework of the CONNECT method to combine | |
the headers with the CONNECT request as it causes problems for | |
some proxies | |
""" | |
connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host, | |
self._tunnel_port) | |
header_bytes = connect_str.encode("ascii") | |
for header, value in self._tunnel_headers.items(): | |
header_str = "%s: %s\r\n" % (header, value) | |
header_bytes += header_str.encode("latin-1") | |
self.send(header_bytes + b'\r\n') | |
response = self.response_class(self.sock, method=self._method) | |
(version, code, message) = response._read_status() | |
if code != OK: | |
self.close() | |
raise OSError("Tunnel connection failed: %d %s" % (code, | |
message.strip())) | |
while True: | |
line = response.fp.readline(http_client._MAXLINE + 1) | |
if len(line) > http_client._MAXLINE: | |
raise LineTooLong("header line") | |
if not line: | |
# for sites which EOF without sending a trailer | |
break | |
if line in (b'\r\n', b'\n', b''): | |
break | |
if self.debuglevel > 0: | |
print('header:', line.decode()) | |
class ProxyConnectWithHeadersHTTPSAdapter(HTTPAdapter): | |
"""Overriding HTTP Adapter so that we can use our own Connection, since | |
we need to get at _tunnel() | |
""" | |
def proxy_manager_for(self, proxy, **proxy_kwargs): | |
manager = super(ProxyConnectWithHeadersHTTPSAdapter, self).proxy_manager_for(proxy, **proxy_kwargs) | |
# Need to override the ConnectionCls with our Subclassed one to get at _tunnel() | |
manager.pool_classes_by_scheme['https'].ConnectionCls = VerifiedHTTPSConnectionWithHeaders | |
return manager | |
def get(url, proxies, verify=True): | |
with Session() as s: | |
s.mount('https://', ProxyConnectWithHeadersHTTPSAdapter()) | |
s.proxies = proxies | |
s.verify = verify | |
response = s.get(url) | |
return response | |
if __name__ == '__main__': | |
logging.basicConfig() | |
logging.getLogger().setLevel(logging.DEBUG) | |
requests_log = logging.getLogger("requests.packages.urllib3") | |
requests_log.setLevel(logging.DEBUG) | |
requests_log.propagate = True | |
proxies = { 'https': 'https://some_proxy:443' } | |
url = 'https://www.google.com' | |
r = get(url, proxies, verify=False) | |
print(r.status_code) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This did not work for me on Python3.6.7; I changed to:
See:
Python 3.6.7 (default, Oct 22 2018, 11:32:17)
[GCC 8.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
Or:
Python 3.6.7 (default, Oct 22 2018, 11:32:17)
[GCC 8.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.