Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ianchesal/1072110 to your computer and use it in GitHub Desktop.
Save ianchesal/1072110 to your computer and use it in GitHub Desktop.
Download simpledesktops.com wallpaper images
#!/usr/bin/python
__doc__ = """
simpledesktops_download.py
A quick script that fetches desktop images from the http://simpledesktops.com/ site.
It will start at the most current list of images and keep moving backwards, downloading
desktop images in to a directory on your local machine, until it finds an image that
already exists on disk. At that point it will stop.
Image names are prefixed with the yyyy-mm-dd string from the URL so images with
the same names, but published on different dates, don't collide on disk.
This lets you run the script infrequently and have it fill in the missing images on
your disk, stopping when it gets to the point where you last downloaded images from
the site.
- Please be gentle when downloading files.
- Browse around their site to give them support!
Future Work:
------------
- TODO Improve the way the URI for the image is devined from the source, make less fragile
- TODO Switch to using their RSS feed: http://stackoverflow.com/questions/5722963/python-rss-parser-that-also-handles-feedburner
- TODO See the feed at: http://feeds.feedburner.com/simpledesktops
Created by Will Nowack ([email protected])
Additional contributions by Ian Chesal ([email protected])
Requirements:
-------------
BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/#Download
pip install beautifulsoup4
Usage:
------
$ simpledesktops_download.py
Downloading images from: http://simpledesktops.com/browse
Saving images in: /Users/ian/Pictures/Desktops/simpledesktops
Downloading http://static.simpledesktops.com/uploads/desktops/2013/05/25/spining2.png --> /Users/ian/Pictures/Desktops/simpledesktops/2013-05-25-spining2.png
Use --help for more information on options.
"""
__author__ = 'Will Nowak <[email protected]>'
__contributors__ = ('Ian Chesal <[email protected]>')
__version__ = '1.1.0'
import urllib
import urlparse
import os
import re
from bs4 import BeautifulSoup
import sys
import argparse
from time import sleep
def build_command_line_parser():
parser = argparse.ArgumentParser(description='Download desktop images from simpledesktops.com')
parser.add_argument('--url', '-u',
dest='baseurl',
default='http://simpledesktops.com/browse',
help='base URL to search for new images from')
parser.add_argument('--path', '-p',
dest="path",
default=os.path.expanduser('~/Pictures/Desktops/simpledesktops'),
help="directory where images should be stored")
parser.add_argument('--offset', '-o',
dest='offset',
type=int,
default=1,
help='starting offset value for browsing the picture history')
parser.add_argument('--timeout', '-t',
dest='timeout',
type=float,
default=5.0,
help='time to wait in seconds between downloads')
return parser
def form_file_name(urlpath):
'''
Parse a simpledesktops.com URL and turn it in to a file name that
takes the form yyyy-mm-dd-filename so we can handle repeating file
names that seem to occur over time with the site.
'''
# The parsed URLs take the form:
# /desktops/2011/07/07/Safari_Desktop_Picture.png
filename = os.path.basename(urlpath)
matches = re.match('.*/(\d{4})/(\d{2})/(\d{2})/%s$' % filename, urlpath)
if matches:
filename = '%s-%s-%s-%s' % (matches.group(1), matches.group(2), matches.group(3), filename)
else:
print 'Error: Cannot parse date from %s' % urlpath
return filename
def main():
parser = build_command_line_parser()
options = parser.parse_args()
# If the dowload directory doesn't exist: error
if not os.path.isdir(options.path):
print 'Error: Download destination %s does not exist' % options.path
return 1
if not options.timeout > 1:
print 'Error: --timeout value must be > 1 second -- play nice!'
return 1
print "Downloading images from: %s" % options.baseurl
print "Saving images in: %s" % options.path
for i in range(options.offset, 1000):
# Browse backwards until we find a file that already exists
# on disk and then stop browsing. This lets us run this script
# periodically to "catch up" to the last time we downloaded
# images from simpledesktops.com...
stop_parsing = False
url = '%s/%s/' % (options.baseurl, i)
b = BeautifulSoup(urllib.urlopen(url).read())
for x in b.findAll(attrs={'class': 'desktop'}):
uri = x.find('img')['src']
# Image links are in the form: http://static.simpledesktops.com/desktops/2011/07/07/Safari_Desktop_Picture.png.295x184_q100.png
# Need to drop the .295x184_q100.png from the end to get the full size version of the image
if uri.endswith('.295x184_q100.png'):
uri = uri.replace('.295x184_q100.png', '')
else:
print 'Error: Encountered an img URI I do not understand: %s' % uri
return 1
file_name = form_file_name(urlparse.urlparse(uri).path)
f = os.path.join(options.path, file_name)
if os.path.isfile(f):
print 'Found existing image %s -- halting downloads' % f
stop_parsing = True
break
print '[%04d] Downloading %s --> %s' % (i, uri, f)
urllib.urlretrieve(uri, f)
sleep(options.timeout)
if stop_parsing:
break
print 'All done!'
return 0
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment