Skip to content

Instantly share code, notes, and snippets.

@allenyang79
Last active April 3, 2019 06:56
Show Gist options
  • Save allenyang79/409fd87eb04c7e12e76e8106d85bb238 to your computer and use it in GitHub Desktop.
Save allenyang79/409fd87eb04c7e12e76e8106d85bb238 to your computer and use it in GitHub Desktop.
"""
python3有concurrent.futures新功能可以做JobQueuePool的機制
"""
import concurrent.futures
import requests # This is not standard library
URLS = [
'https://docs.python.org/3/library/ast.html',
'https://docs.python.org/3/library/abc.html',
'https://docs.python.org/3/library/time.html',
'https://docs.python.org/3/library/os.html',
'https://docs.python.org/3/library/sys.html',
'https://docs.python.org/3/library/io.html',
'https://docs.python.org/3/library/pdb.html',
'https://docs.python.org/3/library/weakref.html'
]
def get_content(url):
"""Job concept
"""
return requests.get(url).text
def scrap():
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
future_to_url = {executor.submit(get_content, url): url for url in URLS}
print(future_to_url)
for future in concurrent.futures.as_completed(future_to_url.keys()):
url = future_to_url[future]
try:
data = future.result()
except Execption as exc:
print('%r generated an exception: %s' % (url, exc))
else:
print('%r page length is %d' % (url, len(data)))
def main():
for url in URLS:
try:
data = get_content(url)
except Exception as exc:
print('%r generated an exception: %s' % (url, exc))
else:
print('%r page length is %d' % (url, len(data)))
if __name__ == '__main__':
scrap()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment