Created
November 8, 2018 04:02
-
-
Save YieldNull/cb3a881bad7862d35a57df9cc572090b to your computer and use it in GitHub Desktop.
A Multithread Downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import logging | |
import requests | |
from multiprocessing.pool import ThreadPool | |
from multiprocessing import Queue | |
def download(task_file, repository, pool_size=8): | |
logger = logging.getLogger('downloader') | |
logging.basicConfig(stream=sys.stdout, level=logging.INFO) | |
tasks = [] | |
with open(task_file, 'r') as f: | |
for url in f.readlines(): | |
if url.strip(): | |
tasks.append(url.strip()) | |
pool = ThreadPool(pool_size) | |
feedback = Queue() | |
def handle(url): | |
filename = os.path.basename(url) | |
filepath = os.path.join(repository, filename) | |
try: | |
with open(filepath, 'wb') as f: | |
f.write(requests.get(url).content) | |
logger.info('DONE %s', url) | |
except Exception as e: | |
logger.warning('FAILED %s %s', url, repr(e)) | |
feedback.put(url) | |
for index, url in enumerate(tasks): | |
if index < pool_size or feedback.get(): | |
pool.apply_async(handle, args=(url,)) | |
pool.close() | |
pool.join() | |
logger.info('DONE ALL') | |
if __name__ == '__main__': | |
import sys | |
download(sys.argv[1], sys.argv[2]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment