Last active
October 16, 2016 18:15
-
-
Save Hellowlol/1aa04a1808534aee6b87a906b1d7e53c to your computer and use it in GitHub Desktop.
Better http for py2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import partial | |
from multiprocessing.dummy import Pool as ThreadPool | |
import urllib3 | |
try: | |
from ujson import dumps | |
except ImportError: | |
try: | |
from simplejson import dumps | |
except ImportError: | |
from json import dumps | |
def http(urls, headers=None, timeout=10, method='GET', cb=None, workers=10, chunk=None, rtype='content'): | |
if isinstance(urls, str): | |
urls = urls.split() or urls.split(',') | |
if headers is None: | |
headers = {} | |
# From cpython | |
if chunk is None: | |
chunk, extra = divmod(len(urls), workers * 4) | |
if extra: | |
chunk += 1 | |
if len(urls) == 0: | |
chunk = 0 | |
method = method.upper() | |
def _http_requests_urllib3(url, session, headers, timeout=10, method='GET', rtype='json', cb=None): | |
x = session.request(method, url, headers=headers).data | |
if rtype == 'text': | |
x = x.decode('utf-8', 'ignore') | |
elif rtype == 'json': | |
x = loads(x.decode('utf-8', 'ignore')) | |
if cb: | |
return cb(x) | |
return x | |
session = urllib3.PoolManager() # default is 10 | |
part = partial(_http_requests_urllib3, headers=headers, session=session, method=method, rtype=rtype, cb=cb) | |
if len(urls) == 1: | |
yield part(urls[0]) | |
pool = ThreadPool(workers) | |
try: | |
for work in tqdm.tqdm(pool.imap_unordered(part, urls, chunk)): | |
yield work | |
except Exception as e: | |
print(e) | |
finally: | |
pool.close() | |
pool.join() | |
def cb(func): | |
print('Im a callback') | |
return func | |
for a_url in http(['url1', 'url2'], cb=cb, headers={}, rtype='json'): | |
do_something(a_url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Basically 10 workers requesting chunks from a list of URLS then yielding them as they finish.