Skip to content

Instantly share code, notes, and snippets.

@brad-anton
Last active October 24, 2019 00:23
Show Gist options
  • Save brad-anton/ff517025a86754793b680b3c5cfc9797 to your computer and use it in GitHub Desktop.
Save brad-anton/ff517025a86754793b680b3c5cfc9797 to your computer and use it in GitHub Desktop.
Certain proxy servers require the Full HTTP request to be included in the same packet as the HTTP CONNECT, however requests seems to split these up into multiple packets. This work around combines the headers and connect into a single send(). Documented here: https://github.com/requests/requests/issues/4884
"""
requests_connect_with_headers.py
@brad_anton
Certain proxy servers require the Full HTTP request to be included in
the same packet as the HTTP CONNECT, however requests seems to split
these up into multiple packets. This work around combines the headers
and connect into a single send().
Documented here: https://github.com/requests/requests/issues/4884
"""
from requests import Session
from requests.adapters import HTTPAdapter
from requests.packages.urllib3 import proxy_from_url
from requests.packages.urllib3.connection import VerifiedHTTPSConnection
import logging
try:
import http.client as http_client
from http.HTTPStatus import OK
except ImportError:
# Python 2
import httplib as http_client
from httplib import OK
http_client.HTTPConnection.debuglevel = 1
class VerifiedHTTPSConnectionWithHeaders(VerifiedHTTPSConnection):
def _tunnel(self):
"""This is just a simple rework of the CONNECT method to combine
the headers with the CONNECT request as it causes problems for
some proxies
"""
connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
self._tunnel_port)
header_bytes = connect_str.encode("ascii")
for header, value in self._tunnel_headers.items():
header_str = "%s: %s\r\n" % (header, value)
header_bytes += header_str.encode("latin-1")
self.send(header_bytes + b'\r\n')
response = self.response_class(self.sock, method=self._method)
(version, code, message) = response._read_status()
if code != OK:
self.close()
raise OSError("Tunnel connection failed: %d %s" % (code,
message.strip()))
while True:
line = response.fp.readline(http_client._MAXLINE + 1)
if len(line) > http_client._MAXLINE:
raise LineTooLong("header line")
if not line:
# for sites which EOF without sending a trailer
break
if line in (b'\r\n', b'\n', b''):
break
if self.debuglevel > 0:
print('header:', line.decode())
class ProxyConnectWithHeadersHTTPSAdapter(HTTPAdapter):
"""Overriding HTTP Adapter so that we can use our own Connection, since
we need to get at _tunnel()
"""
def proxy_manager_for(self, proxy, **proxy_kwargs):
manager = super(ProxyConnectWithHeadersHTTPSAdapter, self).proxy_manager_for(proxy, **proxy_kwargs)
# Need to override the ConnectionCls with our Subclassed one to get at _tunnel()
manager.pool_classes_by_scheme['https'].ConnectionCls = VerifiedHTTPSConnectionWithHeaders
return manager
def get(url, proxies, verify=True):
with Session() as s:
s.mount('https://', ProxyConnectWithHeadersHTTPSAdapter())
s.proxies = proxies
s.verify = verify
response = s.get(url)
return response
if __name__ == '__main__':
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True
proxies = { 'https': 'https://some_proxy:443' }
url = 'https://www.google.com'
r = get(url, proxies, verify=False)
print(r.status_code)
@montsamu
Copy link

montsamu commented Feb 27, 2019

from http.HTTPStatus import OK

This did not work for me on Python3.6.7; I changed to:

from http import HTTPStatus
OK = HTTPStatus.OK

See:

Python 3.6.7 (default, Oct 22 2018, 11:32:17)
[GCC 8.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.

from http.HTTPStatus import OK
Traceback (most recent call last):
File "", line 1, in
ModuleNotFoundError: No module named 'http.HTTPStatus'

Or:

Python 3.6.7 (default, Oct 22 2018, 11:32:17)
[GCC 8.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.

import http.HTTPStatus
Traceback (most recent call last):
File "", line 1, in
ModuleNotFoundError: No module named 'http.HTTPStatus'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment