Created
June 27, 2012 12:17
-
-
Save faruken/3003699 to your computer and use it in GitHub Desktop.
A quick hack on downloading imgur albums with gevent!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
from __future__ import with_statement | |
import sys | |
import os | |
import urllib2 | |
from urlparse import urlparse | |
import random | |
import re | |
import gevent | |
from gevent import monkey | |
monkey.patch_all() | |
def get(url): | |
setup = urllib2.build_opener() | |
# TODO: Write appropriate headers. | |
setup.add_headers = [('None', 'None')] | |
urllib2.install_opener(setup) | |
try: | |
request = urllib2.Request(url) | |
except (urllib2.HTTPError, urllib2.URLError), e: | |
sys.exit(-1) | |
return setup.open(request) | |
def is_url(url): | |
res = urlparse(url) | |
return 'imgur.com' in res.netloc | |
def fetch(url): | |
res = urlparse(url) | |
key = res.path.split('/')[2] | |
urll = 'https://imgur.com/a/%s/noscript' % key | |
return get(urll).read(), key | |
def get_or_create_folder(key, folder=None): | |
foldername = key | |
if folder is not None: | |
foldername = folder | |
if not os.path.exists(foldername): | |
os.makedirs(foldername) | |
return foldername | |
def fetch_images(foldername, images): | |
gevent.sleep(random.randint(0, 1) * 0.0001) | |
path = os.path.join(foldername, images[1]) | |
with open(path, 'wb') as img: | |
img.write(get(images[0]).read()) | |
print 'Done:\t%s' % images[0] | |
def save(url, folder=None): | |
data, key = fetch(url) | |
REGEX = re.compile(r'<img src="(http\:\/\/i\.imgur\.com\/([a-zA-Z0-9]{5}\.(jpg|png|gif)))"') | |
images = REGEX.findall(data) | |
foldername = get_or_create_folder(key, folder) | |
return foldername, images | |
if __name__ == '__main__': | |
url = sys.argv[1] | |
try: | |
folder = sys.argv[2] | |
except IndexError: | |
folder = None | |
foldername, images = save(url, folder=folder) | |
threads = [gevent.spawn(fetch_images, foldername, image) for image in images] | |
gevent.joinall(threads) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment