cstroie · December 19, 2011 18:45
diff --git a/picasaweb_downloader.py b/picasaweb_downloader.py
 #!/usr/bin/env python
 #
 #  picasaweb_downloader.py
 #
 #  Copyright 2011 Costin STROIE <[email protected]>
 #
 #  This file is free software: you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
 #  the Free Software Foundation, either version 3 of the License, or
 #  (at your option) any later version.
 #
 #  This file is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #  GNU General Public License for more details.
 #
 #  You should have received a copy of the GNU General Public License
 #  along with This file.  If not, see <http://www.gnu.org/licenses/>.
 #

 """ Download all the photos of a PicasaWeb album at specified resolution """

 # Import the required modules
 import sys, os, urllib, urlparse
 from lxml import etree as ET
 import Queue, threading

 # The queue
 queue = Queue.Queue()

 class DownloadPhoto(threading.Thread):
    """ Threaded photo downloader """
    def __init__(self, queue, output_dir):
        threading.Thread.__init__(self)
        self.queue = queue
        self.output_dir = output_dir

    def run(self):
        while True:
            # Get one URL from the queue
            url, width, height = self.queue.get()
            # Download the image
            photo_url = urlparse.urlparse(url)
            photo_dir, photo_name = photo_url.path.rsplit('/', 1)
            file_path = os.path.join(self.output_dir, photo_name)
            print 'INFO: Downloading "%s" size %sx%s' % (photo_name, width, height)
            try:
                urllib.urlretrieve(url, file_path)
            except:
                return
            # Signal the queue the job is done
            self.queue.task_done()


 def download_album(rss_url, size = 'd'):
    """ Download the album """
    # Adjust the querystring
    album_url = urlparse.urlparse(rss_url)
    qs = urlparse.parse_qs(album_url.query)
    qs['imgmax'] = size
    qs['max-results'] = 10000
    adjusted_url = urlparse.urlunparse((album_url.scheme,
                                        album_url.netloc,
                                        album_url.path,
                                        album_url.params,
                                        urllib.urlencode(qs, True),
                                        album_url.fragment))
    print 'INFO: URL %s' % adjusted_url
    # Try to open the rss feed
    try:
        rss_feed = urllib.urlopen(adjusted_url)
    except:
        print 'ERROR: Can not open the RSS url.'
        return 1
    # Try to parse the rss feed
    try:
        rss = ET.parse(rss_feed)
    except:
        print 'ERROR: Can not parse the RSS feed.'
        return 2
    # Get the user
    try:
        user = rss.xpath('/rss/channel/managingEditor')[0].text
    except:
        print 'ERROR: Can not get the user.'
        return 3
    # Get the title
    try:
        title = rss.xpath('/rss/channel/title')[0].text
    except:
        print 'ERROR: Can not get the album title.'
        return 4
    # Create the output directory
    output_dir = os.path.join(user, title)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    # Spawn a pool of threads and pass them queue instance
    for i in range(4):
        t = DownloadPhoto(queue, output_dir)
        t.setDaemon(True)
        t.start()
    # Populate the queue with photos url and size
    for item in rss.xpath('/rss/channel/item/media:group/media:content',
                          namespaces = {'media': 'http://search.yahoo.com/mrss/'}):
        queue.put((item.get('url'), item.get('width'), item.get('height')))
    # Wait on the queue until everything has been processed
    queue.join()


 if __name__ == '__main__':
    RSS_URL = sys.argv[1]
    # Download
    sys.exit(download_album(RSS_URL, size = 'd'))

 # vim: set ft=python ai ts=4 sts=4 et sw=4 sta nowrap nu :
	#!/usr/bin/env python
	#
	# picasaweb_downloader.py
	#
	# Copyright 2011 Costin STROIE <[email protected]>
	#
	# This file is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This file is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with This file. If not, see <http://www.gnu.org/licenses/>.
	#

	""" Download all the photos of a PicasaWeb album at specified resolution """

	# Import the required modules
	import sys, os, urllib, urlparse
	from lxml import etree as ET
	import Queue, threading

	# The queue
	queue = Queue.Queue()

	class DownloadPhoto(threading.Thread):
	""" Threaded photo downloader """
	def __init__(self, queue, output_dir):
	threading.Thread.__init__(self)
	self.queue = queue
	self.output_dir = output_dir

	def run(self):
	while True:
	# Get one URL from the queue
	url, width, height = self.queue.get()
	# Download the image
	photo_url = urlparse.urlparse(url)
	photo_dir, photo_name = photo_url.path.rsplit('/', 1)
	file_path = os.path.join(self.output_dir, photo_name)
	print 'INFO: Downloading "%s" size %sx%s' % (photo_name, width, height)
	try:
	urllib.urlretrieve(url, file_path)
	except:
	return
	# Signal the queue the job is done
	self.queue.task_done()


	def download_album(rss_url, size = 'd'):
	""" Download the album """
	# Adjust the querystring
	album_url = urlparse.urlparse(rss_url)
	qs = urlparse.parse_qs(album_url.query)
	qs['imgmax'] = size
	qs['max-results'] = 10000
	adjusted_url = urlparse.urlunparse((album_url.scheme,
	album_url.netloc,
	album_url.path,
	album_url.params,
	urllib.urlencode(qs, True),
	album_url.fragment))
	print 'INFO: URL %s' % adjusted_url
	# Try to open the rss feed
	try:
	rss_feed = urllib.urlopen(adjusted_url)
	except:
	print 'ERROR: Can not open the RSS url.'
	return 1
	# Try to parse the rss feed
	try:
	rss = ET.parse(rss_feed)
	except:
	print 'ERROR: Can not parse the RSS feed.'
	return 2
	# Get the user
	try:
	user = rss.xpath('/rss/channel/managingEditor')[0].text
	except:
	print 'ERROR: Can not get the user.'
	return 3
	# Get the title
	try:
	title = rss.xpath('/rss/channel/title')[0].text
	except:
	print 'ERROR: Can not get the album title.'
	return 4
	# Create the output directory
	output_dir = os.path.join(user, title)
	if not os.path.isdir(output_dir):
	os.makedirs(output_dir)
	# Spawn a pool of threads and pass them queue instance
	for i in range(4):
	t = DownloadPhoto(queue, output_dir)
	t.setDaemon(True)
	t.start()
	# Populate the queue with photos url and size
	for item in rss.xpath('/rss/channel/item/media:group/media:content',
	namespaces = {'media': 'http://search.yahoo.com/mrss/'}):
	queue.put((item.get('url'), item.get('width'), item.get('height')))
	# Wait on the queue until everything has been processed
	queue.join()


	if __name__ == '__main__':
	RSS_URL = sys.argv[1]
	# Download
	sys.exit(download_album(RSS_URL, size = 'd'))

	# vim: set ft=python ai ts=4 sts=4 et sw=4 sta nowrap nu :