Created
June 28, 2018 12:51
-
-
Save huangzhuolin/82a17841eae34f602e349e3f819af0f7 to your computer and use it in GitHub Desktop.
[downloading with concurrent.futures] web downloads in three styles #python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# sequential download script | |
import os | |
import time | |
import sys | |
import requests | |
POP20_CC = ('CN IN US ID BR PK NG BD RU JP ' | |
'MX PH VN ET EG DE IR TR CD FR').split() | |
BASE_URL = 'http://flupy.org/data/flags' | |
DEST_DIR = 'downloads/' | |
def save_flag(img, filename): | |
path = os.path.join(DEST_DIR, filename) | |
with open(path, 'wb') as fp: | |
fp.write(img) | |
def get_flag(cc): | |
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower()) | |
resp = requests.get(url) | |
return resp.content | |
def show(text): | |
print(text, end=' ') | |
sys.stdout.flush() | |
def download_many(cc_list): | |
for cc in sorted(cc_list): | |
image = get_flag(cc) | |
show(cc) | |
save_flag(image, cc.lower() + '.gif') | |
return len(cc_list) | |
def main(download_many): | |
t0 = time.time() | |
count = download_many(POP20_CC) | |
elapsed = time.time() - t0 | |
msg = '\n{} flags downloaded in {:.2f}s' | |
print(msg.format(count, elapsed)) | |
if __name__ == '__main__': | |
main(download_many) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# downloading with concurrent.futures | |
from concurrent import futures | |
from flags import save_flag, get_flag, show, main | |
MAX_WORKERS = 20 | |
def download_one(cc): | |
image = get_flag(cc) | |
show(cc) | |
save_flag(image, cc.lower() + '.gif') | |
return cc | |
def download_many(cc_list): | |
workers = min(MAX_WORKERS, len(cc_list)) | |
with futures.ThreadPoolExecutor(workers) as executor: | |
res = executor.map(download_one, sorted(cc_list)) | |
return len(list(res)) | |
if __name__ == '__main__': | |
main(download_many) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# replacing executor.map with executor.submit and futures.as_completed in the download_many function | |
# `as_completed` yields futures as they are completed | |
from concurrent import futures | |
from flags_threadpool import download_one | |
def download_many(cc_list): | |
cc_list = cc_list[:5] | |
with futures.ThreadPoolExecutor(max_workers=3) as executor: | |
to_do = [] | |
for cc in sorted(cc_list): | |
future = executor.submit(download_one, cc) | |
to_do.append(future) | |
msg = 'Scheduled for {}: {}' | |
print(msg.format(cc, future)) | |
results = [] | |
for future in futures.as_completed(to_do): | |
res = future.result() | |
msg = '{} result: {!r}' | |
print(msg.format(future, res)) | |
results.append(res) | |
return len(results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment