Skip to content

Instantly share code, notes, and snippets.

@kisom
Created December 2, 2011 08:51
Show Gist options
  • Save kisom/1422395 to your computer and use it in GitHub Desktop.
Save kisom/1422395 to your computer and use it in GitHub Desktop.
imgur album downloader
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# author: kyle isom <[email protected]>
# license: isc / public domain dual-license
# see (http://www.brokenlcd.net/standard-license.txt)
#
"""
This is a module (with a main() function) to grab a list of image links in
an imgur album. The main() function demonstrates how to use that to
download an album to a directory.
"""
# PEP8 status:
# a warning still exist, namely
# imgur.py:61:36: W601 .has_key() is deprecated, use 'in'
# this is annotated in ImgurAlbum.__image_generator but the reason
# is that the BeautifulSoup nodes don't support the in keyword.
# pylint status:
# several warnings still exist, namely
# imgur.py:48: [E, ImgurAlbum.__init__] Instance of 'Request' has no 'content'
# member (but some types could not be inferred)
# pylint doesn't realise the Request object has a content method
# imgur.py:24: [R, ImgurAlbum] Too few public methods (0/2)
# the ImgurAlbum has as many methods as are applicable
import argparse
import os
# external dependencies
import BeautifulSoup
import requests
class ImgurAlbum:
"""
An abstraction of an imgur album. Provides access to the album's title
and the list of image links. It is initialised with the url to library:
album = ImgurAlbum('http://www.imgur.com/a/0ruA9')
The list of links is grabbed from album.image_generator, which is a
generator object. The generator can be reset by:
album.image_generator = album.__image_generator()
"""
url = None
title = None
req = None
soup = None
def __init__(self, target):
"""
The album must be initialised with the url to the album. The
class will handle the rest.
"""
open('/tmp/imgur.txt', 'a').write('received target %s\n' % (
target))
self.url = target
self.req = requests.get(target)
self.soup = BeautifulSoup.BeautifulSoup(self.req.content)
self.title = self.soup.find('title').contents[0]
self.title = self.title.strip().strip(' - Imgur')
self.image_generator = self.__image_generator()
def __image_generator(self):
"""
This is an internal function to initialise the link generator.
"""
# has_key is deprecated, but the nodes don't act like proper
# dictionaries - 'class' in img won't work, for example.
return (img['data-src'] for img in self.soup.findAll('img') if
img.has_key('class') and
img.has_key('data-src') and
'unloaded' == img['class'])
def main(target, verbose=True):
"""
Main code that takes a url to an imgur album and downloads it
to a directory (named by the album's title).
"""
album = ImgurAlbum(target)
if not os.access(album.title, os.F_OK):
os.makedirs(album.title)
for link in album.image_generator:
filename = os.path.join(album.title, os.path.basename(link))
if verbose:
print 'processing: %s' % (filename, )
image = requests.get(link)
open(filename, 'w').write(image.content)
if '__main__' == __name__:
DESC = 'script to download an imgur album'
PARSER = argparse.ArgumentParser(description=DESC)
PARSER.add_argument('-d', '--directory', action='store',
help='top level directory to store album in')
PARSER.add_argument('target', help='album to download')
ARGS = PARSER.parse_args()
if ARGS.directory:
PUSHDIR = os.getcwd()
os.chdir(ARGS.directory)
main(ARGS.target)
if ARGS.directory:
os.chdir(PUSHDIR)
@kisom
Copy link
Author

kisom commented Jan 22, 2012

Glad to hear!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment