Skip to content

Instantly share code, notes, and snippets.

@ivanjr0
Created July 3, 2013 15:42
Show Gist options
  • Save ivanjr0/5919547 to your computer and use it in GitHub Desktop.
Save ivanjr0/5919547 to your computer and use it in GitHub Desktop.
CurlMulti based downloader
# -*- coding: utf-8 -*-
# NOQA # Baseado em http://pycurl.cvs.sourceforge.net/viewvc/pycurl/pycurl/examples/retriever-multi.py?view=markup
try:
import signal
from signal import SIGPIPE, SIG_IGN
signal.signal(signal.SIGPIPE, signal.SIG_IGN)
except ImportError:
pass
import os
from urlparse import urlparse
import pycurl
def download(root_path, conns, urls):
errors = []
m = pycurl.CurlMulti()
m.handles = []
for _ in range(conns):
c = pycurl.Curl()
c.fp = None
c.setopt(pycurl.FOLLOWLOCATION, 0)
c.setopt(pycurl.FAILONERROR, 1)
c.setopt(pycurl.NOSIGNAL, 1)
c.setopt(pycurl.CONNECTTIMEOUT, 30)
c.setopt(pycurl.TIMEOUT, 300)
m.handles.append(c)
queue = []
for url in urls:
url_path = urlparse(url).path
filename = url_path.rsplit('/', 1)[-1]
path = os.path.join(root_path, os.path.dirname(url_path)[1:])
try:
os.makedirs(path)
except OSError, e:
if not 'File exists' in str(e):
raise e
queue.append((url, os.path.join(path, filename)))
num_processed = 0
num_total = len(queue)
freelist = m.handles[:]
while num_processed < num_total:
while queue and freelist:
url, filename = queue.pop(0)
c = freelist.pop()
c.fp = open(filename, 'wb')
c.setopt(pycurl.URL, url)
c.setopt(pycurl.WRITEDATA, c.fp)
m.add_handle(c)
c.filename = filename
c.url = url
while 1:
ret, num_handles = m.perform()
if ret != pycurl.E_CALL_MULTI_PERFORM:
break
while 1:
num_q, ok_list, err_list = m.info_read()
for c in ok_list:
c.fp.close()
c.fp = None
m.remove_handle(c)
freelist.append(c)
for c, errno, errmsg in err_list:
errors.append(c.url)
c.fp.close()
c.fp = None
m.remove_handle(c)
freelist.append(c)
num_processed = num_processed + len(ok_list) + len(err_list)
if num_q == 0:
break
m.select(1.0)
for c in m.handles:
if c.fp is not None:
c.fp.close()
c.fp = None
c.close()
m.close()
return errors
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment