# coding: utf-8 import requests import sys, os, re from gevent import monkey, pool, Timeout monkey.patch_all(dns=False) p = pool.Pool(20) TIMEOUT = 10 title_p = re.compile(r'title>([^\]]*)</title') base_p = re.compile(r"pic_base\s=\s'([^']*)'") pics_p = re.compile(r"picTree\s=\s\[([^\]]*)\]") cnt = None sam = None def download(url): global cnt, sam while True: try: r = requests.get(url) break except Exception as e: print e.message r.encoding = 'utf8' html = r.text title = title_p.findall(html)[0] if not os.path.exists(title): os.mkdir(title) base = base_p.findall(html)[0] if base[-1] != '/': base += '/' pics = pics_p.findall(html)[0].split(',') pics = map(lambda x: x.strip('"'), pics) cnt, sam = 0, len(pics) origin = os.path.abspath('.') os.chdir(title) print cnt, '/', sam for i, pic in enumerate(pics): #dl(i, base+pic) p.spawn(dl, i, base+pic) p.join() os.chdir(origin) def dl(i, pic_url): global cnt filename = str(i) + pic_url[pic_url.rindex('.'):] if not os.path.exists(filename): while True: try: with Timeout(TIMEOUT) as timeout: img = requests.get(pic_url).content open(filename, 'wb').write(img) break except (Exception, Timeout) as e: print e.message or 'timeout' cnt += 1 print cnt, '/', sam if __name__ == '__main__': if len(sys.argv) < 2: print 'Useage:' print __file__, '[url]' exit() url = sys.argv[1] download(url)