Skip to content

Instantly share code, notes, and snippets.

@faruken
Created June 27, 2012 12:17
Show Gist options
  • Save faruken/3003699 to your computer and use it in GitHub Desktop.
Save faruken/3003699 to your computer and use it in GitHub Desktop.
A quick hack on downloading imgur albums with gevent!
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import with_statement
import sys
import os
import urllib2
from urlparse import urlparse
import random
import re
import gevent
from gevent import monkey
monkey.patch_all()
def get(url):
setup = urllib2.build_opener()
# TODO: Write appropriate headers.
setup.add_headers = [('None', 'None')]
urllib2.install_opener(setup)
try:
request = urllib2.Request(url)
except (urllib2.HTTPError, urllib2.URLError), e:
sys.exit(-1)
return setup.open(request)
def is_url(url):
res = urlparse(url)
return 'imgur.com' in res.netloc
def fetch(url):
res = urlparse(url)
key = res.path.split('/')[2]
urll = 'https://imgur.com/a/%s/noscript' % key
return get(urll).read(), key
def get_or_create_folder(key, folder=None):
foldername = key
if folder is not None:
foldername = folder
if not os.path.exists(foldername):
os.makedirs(foldername)
return foldername
def fetch_images(foldername, images):
gevent.sleep(random.randint(0, 1) * 0.0001)
path = os.path.join(foldername, images[1])
with open(path, 'wb') as img:
img.write(get(images[0]).read())
print 'Done:\t%s' % images[0]
def save(url, folder=None):
data, key = fetch(url)
REGEX = re.compile(r'<img src="(http\:\/\/i\.imgur\.com\/([a-zA-Z0-9]{5}\.(jpg|png|gif)))"')
images = REGEX.findall(data)
foldername = get_or_create_folder(key, folder)
return foldername, images
if __name__ == '__main__':
url = sys.argv[1]
try:
folder = sys.argv[2]
except IndexError:
folder = None
foldername, images = save(url, folder=folder)
threads = [gevent.spawn(fetch_images, foldername, image) for image in images]
gevent.joinall(threads)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment