Created
December 19, 2011 18:45
-
-
Save cstroie/1498354 to your computer and use it in GitHub Desktop.
Download all the photos of a PicasaWeb album at specified resolution
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# picasaweb_downloader.py | |
# | |
# Copyright 2011 Costin STROIE <[email protected]> | |
# | |
# This file is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This file is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with This file. If not, see <http://www.gnu.org/licenses/>. | |
# | |
""" Download all the photos of a PicasaWeb album at specified resolution """ | |
# Import the required modules | |
import sys, os, urllib, urlparse | |
from lxml import etree as ET | |
import Queue, threading | |
# The queue | |
queue = Queue.Queue() | |
class DownloadPhoto(threading.Thread): | |
""" Threaded photo downloader """ | |
def __init__(self, queue, output_dir): | |
threading.Thread.__init__(self) | |
self.queue = queue | |
self.output_dir = output_dir | |
def run(self): | |
while True: | |
# Get one URL from the queue | |
url, width, height = self.queue.get() | |
# Download the image | |
photo_url = urlparse.urlparse(url) | |
photo_dir, photo_name = photo_url.path.rsplit('/', 1) | |
file_path = os.path.join(self.output_dir, photo_name) | |
print 'INFO: Downloading "%s" size %sx%s' % (photo_name, width, height) | |
try: | |
urllib.urlretrieve(url, file_path) | |
except: | |
return | |
# Signal the queue the job is done | |
self.queue.task_done() | |
def download_album(rss_url, size = 'd'): | |
""" Download the album """ | |
# Adjust the querystring | |
album_url = urlparse.urlparse(rss_url) | |
qs = urlparse.parse_qs(album_url.query) | |
qs['imgmax'] = size | |
qs['max-results'] = 10000 | |
adjusted_url = urlparse.urlunparse((album_url.scheme, | |
album_url.netloc, | |
album_url.path, | |
album_url.params, | |
urllib.urlencode(qs, True), | |
album_url.fragment)) | |
print 'INFO: URL %s' % adjusted_url | |
# Try to open the rss feed | |
try: | |
rss_feed = urllib.urlopen(adjusted_url) | |
except: | |
print 'ERROR: Can not open the RSS url.' | |
return 1 | |
# Try to parse the rss feed | |
try: | |
rss = ET.parse(rss_feed) | |
except: | |
print 'ERROR: Can not parse the RSS feed.' | |
return 2 | |
# Get the user | |
try: | |
user = rss.xpath('/rss/channel/managingEditor')[0].text | |
except: | |
print 'ERROR: Can not get the user.' | |
return 3 | |
# Get the title | |
try: | |
title = rss.xpath('/rss/channel/title')[0].text | |
except: | |
print 'ERROR: Can not get the album title.' | |
return 4 | |
# Create the output directory | |
output_dir = os.path.join(user, title) | |
if not os.path.isdir(output_dir): | |
os.makedirs(output_dir) | |
# Spawn a pool of threads and pass them queue instance | |
for i in range(4): | |
t = DownloadPhoto(queue, output_dir) | |
t.setDaemon(True) | |
t.start() | |
# Populate the queue with photos url and size | |
for item in rss.xpath('/rss/channel/item/media:group/media:content', | |
namespaces = {'media': 'http://search.yahoo.com/mrss/'}): | |
queue.put((item.get('url'), item.get('width'), item.get('height'))) | |
# Wait on the queue until everything has been processed | |
queue.join() | |
if __name__ == '__main__': | |
RSS_URL = sys.argv[1] | |
# Download | |
sys.exit(download_album(RSS_URL, size = 'd')) | |
# vim: set ft=python ai ts=4 sts=4 et sw=4 sta nowrap nu : |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment