Skip to content

Instantly share code, notes, and snippets.

@ivanjr0
Created July 3, 2013 15:41
Show Gist options
  • Save ivanjr0/5919530 to your computer and use it in GitHub Desktop.
Save ivanjr0/5919530 to your computer and use it in GitHub Desktop.
Threaded downloader
# -*- coding: utf-8 -*-
import os
from Queue import Queue
from urlparse import urlparse
from threading import Thread, Lock
import requests
CHUNCK_SIZE = 8192
class Downloader(object):
class Errors(object):
def __init__(self):
self.urls = []
self.lock = Lock()
def append(self, url):
with self.lock:
self.urls.append(url)
def __init__(self, queue, errors):
self._queue = queue
self._errors = errors
@staticmethod
def _download(url, filepath):
file_ = open(filepath, 'wb')
response = requests.get(url)
response.raise_for_status()
for chunck in response.iter_content(CHUNCK_SIZE):
if not chunck:
break
file_.write(chunck)
file_.close()
def __call__(self):
q = self._queue
err = self._errors
while 1:
try:
url, filepath = q.get()
type(self)._download(url, filepath)
except requests.RequestException:
err.append(url)
finally:
q.task_done()
def download(root_path, conns, urls):
q = Queue()
workers = []
errors = Downloader.Errors()
for url in urls:
url_path = urlparse(url).path
filename = url_path.rsplit('/', 1)[-1]
path = os.path.join(root_path, os.path.dirname(url_path)[1:])
try:
os.makedirs(path)
except OSError, e:
if not 'File exists' in str(e): # OK if dir already exists.
raise e
filepath = os.path.join(path, filename)
q.put((url, filepath))
for _ in range(max(conns, len(urls))):
t = Thread(target=Downloader(q, errors))
t.daemon = True
t.start()
workers.append(t)
q.join()
return errors.urls
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment