Skip to content

Instantly share code, notes, and snippets.

@YieldNull
Created November 8, 2018 04:02
Show Gist options
  • Save YieldNull/cb3a881bad7862d35a57df9cc572090b to your computer and use it in GitHub Desktop.
Save YieldNull/cb3a881bad7862d35a57df9cc572090b to your computer and use it in GitHub Desktop.
A Multithread Downloader
import os
import logging
import requests
from multiprocessing.pool import ThreadPool
from multiprocessing import Queue
def download(task_file, repository, pool_size=8):
logger = logging.getLogger('downloader')
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
tasks = []
with open(task_file, 'r') as f:
for url in f.readlines():
if url.strip():
tasks.append(url.strip())
pool = ThreadPool(pool_size)
feedback = Queue()
def handle(url):
filename = os.path.basename(url)
filepath = os.path.join(repository, filename)
try:
with open(filepath, 'wb') as f:
f.write(requests.get(url).content)
logger.info('DONE %s', url)
except Exception as e:
logger.warning('FAILED %s %s', url, repr(e))
feedback.put(url)
for index, url in enumerate(tasks):
if index < pool_size or feedback.get():
pool.apply_async(handle, args=(url,))
pool.close()
pool.join()
logger.info('DONE ALL')
if __name__ == '__main__':
import sys
download(sys.argv[1], sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment