Created
June 11, 2014 16:14
-
-
Save wickman/8a3eb57ee35e84e6bc85 to your computer and use it in GitHub Desktop.
requests patch to twitter.common.http
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/python/twitter/common/python/http/http.py b/src/python/twitter/common/python/http/h | |
index 51d0543..db5ab05 100644 | |
--- a/src/python/twitter/common/python/http/http.py | |
+++ b/src/python/twitter/common/python/http/http.py | |
@@ -6,7 +6,7 @@ import struct | |
import time | |
from ..common import safe_delete, safe_mkdir, safe_mkdtemp | |
-from ..compatibility import PY2, PY3 | |
+from ..compatibility import PY2, PY3, StringIO | |
from .tracer import TRACER | |
if PY3: | |
@@ -25,6 +25,13 @@ else: | |
import urlparse | |
+try: | |
+ import requests | |
+ HAS_REQUESTS = True | |
+except ImportError: | |
+ HAS_REQUESTS = False | |
+ | |
+ | |
class Timeout(Exception): | |
pass | |
@@ -59,6 +66,34 @@ def deadline(fn, *args, **kw): | |
raise Timeout | |
+# TODO(wickman) Extract md5/sha1 fragments and verify. | |
+# TODO(wickman) Establish a chain of trust rooted at pypi and anything explicitly called | |
+# out in find-links a la pip. | |
+ | |
+ | |
+def urllib_open(url, verify=False): | |
+ if verify: | |
+ raise FetchError('urlopen does not support SSL cert verification.') | |
+ try: | |
+ return urllib_request.urlopen(url, **kw) | |
+ except (urllib_error.URLError, HTTPException) as exc: | |
+ raise FetchError(exc) | |
+ | |
+ | |
+def requests_open(url, verify=False): | |
+ try: | |
+ resp = requests.get(url, verify=verify) | |
+ except requests.exceptions.RequestException as exc: | |
+ raise FetchError(exc) | |
+ return addinfourl(StringIO(resp.content), resp.headers, url, code=resp.status_code) | |
+ | |
+ | |
+if HAS_REQUESTS: | |
+ global_opener = requests_open | |
+else: | |
+ global_opener = urllib_open | |
+ | |
+ | |
class Web(object): | |
NS_TIMEOUT_SECS = 5.0 | |
CONN_TIMEOUT = 1.0 | |
@@ -112,7 +147,7 @@ class Web(object): | |
return 'file://' + os.path.realpath(url) | |
return url | |
- def open(self, url, conn_timeout=None, **kw): | |
+ def open(self, url, conn_timeout=None, verify=True): | |
""" | |
Wrapper in front of urlopen that more gracefully handles odd network environments. | |
""" | |
@@ -120,10 +155,7 @@ class Web(object): | |
with TRACER.timed('Fetching %s' % url, V=1): | |
if not self.reachable(url, conn_timeout=conn_timeout): | |
raise FetchError('Could not reach %s within deadline.' % url) | |
- try: | |
- return urllib_request.urlopen(url, **kw) | |
- except (urllib_error.URLError, HTTPException) as exc: | |
- raise FetchError(exc) | |
+ return global_opener(url, verify=verify) | |
class CachedWeb(object): | |
@@ -168,16 +200,16 @@ class CachedWeb(object): | |
return False | |
return age > ttl | |
- def really_open(self, url, conn_timeout=None): | |
+ def really_open(self, url, conn_timeout=None, **kw): | |
try: | |
- return self._opener.open(url, conn_timeout=conn_timeout) | |
+ return self._opener.open(url, conn_timeout=conn_timeout, **kw) | |
except urllib_error.HTTPError as fp: | |
# HTTPError is a valid addinfourl -- use this instead of raising | |
return fp | |
- def encode_url(self, url, conn_timeout=None): | |
+ def encode_url(self, url, **kw): | |
target, target_tmp, headers, headers_tmp = self.translate_all(url) | |
- with contextlib.closing(self.really_open(url, conn_timeout=conn_timeout)) as http_fp: | |
+ with contextlib.closing(self.really_open(url, **kw)) as http_fp: | |
# File urls won't have a response code, they'll either open or raise. | |
if http_fp.getcode() and http_fp.getcode() != 200: | |
raise urllib_error.URLError('Non-200 response code from %s' % url) | |
@@ -202,21 +234,21 @@ class CachedWeb(object): | |
for path in self.translate_all(url): | |
safe_delete(path) | |
- def cache(self, url, conn_timeout=None): | |
+ def cache(self, url, **kw): | |
"""cache the contents of a url.""" | |
try: | |
- self.encode_url(url, conn_timeout=conn_timeout) | |
+ self.encode_url(url, **kw) | |
except urllib_error.URLError: | |
self.clear_url(url) | |
raise | |
- def open(self, url, ttl=None, conn_timeout=None): | |
+ def open(self, url, ttl=None, **kw): | |
"""Return a file-like object with the content of the url.""" | |
expired = self.expired(url, ttl=ttl) | |
with TRACER.timed('Opening %s' % ('(cached)' if not expired else '(uncached)'), V=1): | |
if expired: | |
try: | |
- self.cache(url, conn_timeout=conn_timeout) | |
+ self.cache(url, **kw) | |
except (urllib_error.URLError, HTTPException) as exc: | |
if not self._failsoft or url not in self: | |
raise FetchError(exc) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment