Skip to content

Instantly share code, notes, and snippets.

@cstroie
Created December 19, 2011 18:45
Show Gist options
  • Save cstroie/1498354 to your computer and use it in GitHub Desktop.
Save cstroie/1498354 to your computer and use it in GitHub Desktop.
Download all the photos of a PicasaWeb album at specified resolution
#!/usr/bin/env python
#
# picasaweb_downloader.py
#
# Copyright 2011 Costin STROIE <[email protected]>
#
# This file is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This file is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with This file. If not, see <http://www.gnu.org/licenses/>.
#
""" Download all the photos of a PicasaWeb album at specified resolution """
# Import the required modules
import sys, os, urllib, urlparse
from lxml import etree as ET
import Queue, threading
# The queue
queue = Queue.Queue()
class DownloadPhoto(threading.Thread):
""" Threaded photo downloader """
def __init__(self, queue, output_dir):
threading.Thread.__init__(self)
self.queue = queue
self.output_dir = output_dir
def run(self):
while True:
# Get one URL from the queue
url, width, height = self.queue.get()
# Download the image
photo_url = urlparse.urlparse(url)
photo_dir, photo_name = photo_url.path.rsplit('/', 1)
file_path = os.path.join(self.output_dir, photo_name)
print 'INFO: Downloading "%s" size %sx%s' % (photo_name, width, height)
try:
urllib.urlretrieve(url, file_path)
except:
return
# Signal the queue the job is done
self.queue.task_done()
def download_album(rss_url, size = 'd'):
""" Download the album """
# Adjust the querystring
album_url = urlparse.urlparse(rss_url)
qs = urlparse.parse_qs(album_url.query)
qs['imgmax'] = size
qs['max-results'] = 10000
adjusted_url = urlparse.urlunparse((album_url.scheme,
album_url.netloc,
album_url.path,
album_url.params,
urllib.urlencode(qs, True),
album_url.fragment))
print 'INFO: URL %s' % adjusted_url
# Try to open the rss feed
try:
rss_feed = urllib.urlopen(adjusted_url)
except:
print 'ERROR: Can not open the RSS url.'
return 1
# Try to parse the rss feed
try:
rss = ET.parse(rss_feed)
except:
print 'ERROR: Can not parse the RSS feed.'
return 2
# Get the user
try:
user = rss.xpath('/rss/channel/managingEditor')[0].text
except:
print 'ERROR: Can not get the user.'
return 3
# Get the title
try:
title = rss.xpath('/rss/channel/title')[0].text
except:
print 'ERROR: Can not get the album title.'
return 4
# Create the output directory
output_dir = os.path.join(user, title)
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
# Spawn a pool of threads and pass them queue instance
for i in range(4):
t = DownloadPhoto(queue, output_dir)
t.setDaemon(True)
t.start()
# Populate the queue with photos url and size
for item in rss.xpath('/rss/channel/item/media:group/media:content',
namespaces = {'media': 'http://search.yahoo.com/mrss/'}):
queue.put((item.get('url'), item.get('width'), item.get('height')))
# Wait on the queue until everything has been processed
queue.join()
if __name__ == '__main__':
RSS_URL = sys.argv[1]
# Download
sys.exit(download_album(RSS_URL, size = 'd'))
# vim: set ft=python ai ts=4 sts=4 et sw=4 sta nowrap nu :
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment