-
-
Save ianchesal/1072110 to your computer and use it in GitHub Desktop.
Download simpledesktops.com wallpaper images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
__doc__ = """ | |
simpledesktops_download.py | |
A quick script that fetches desktop images from the http://simpledesktops.com/ site. | |
It will start at the most current list of images and keep moving backwards, downloading | |
desktop images in to a directory on your local machine, until it finds an image that | |
already exists on disk. At that point it will stop. | |
Image names are prefixed with the yyyy-mm-dd string from the URL so images with | |
the same names, but published on different dates, don't collide on disk. | |
This lets you run the script infrequently and have it fill in the missing images on | |
your disk, stopping when it gets to the point where you last downloaded images from | |
the site. | |
- Please be gentle when downloading files. | |
- Browse around their site to give them support! | |
Future Work: | |
------------ | |
- TODO Improve the way the URI for the image is devined from the source, make less fragile | |
- TODO Switch to using their RSS feed: http://stackoverflow.com/questions/5722963/python-rss-parser-that-also-handles-feedburner | |
- TODO See the feed at: http://feeds.feedburner.com/simpledesktops | |
Created by Will Nowack ([email protected]) | |
Additional contributions by Ian Chesal ([email protected]) | |
Requirements: | |
------------- | |
BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/#Download | |
pip install beautifulsoup4 | |
Usage: | |
------ | |
$ simpledesktops_download.py | |
Downloading images from: http://simpledesktops.com/browse | |
Saving images in: /Users/ian/Pictures/Desktops/simpledesktops | |
Downloading http://static.simpledesktops.com/uploads/desktops/2013/05/25/spining2.png --> /Users/ian/Pictures/Desktops/simpledesktops/2013-05-25-spining2.png | |
Use --help for more information on options. | |
""" | |
__author__ = 'Will Nowak <[email protected]>' | |
__contributors__ = ('Ian Chesal <[email protected]>') | |
__version__ = '1.1.0' | |
import urllib | |
import urlparse | |
import os | |
import re | |
from bs4 import BeautifulSoup | |
import sys | |
import argparse | |
from time import sleep | |
def build_command_line_parser(): | |
parser = argparse.ArgumentParser(description='Download desktop images from simpledesktops.com') | |
parser.add_argument('--url', '-u', | |
dest='baseurl', | |
default='http://simpledesktops.com/browse', | |
help='base URL to search for new images from') | |
parser.add_argument('--path', '-p', | |
dest="path", | |
default=os.path.expanduser('~/Pictures/Desktops/simpledesktops'), | |
help="directory where images should be stored") | |
parser.add_argument('--offset', '-o', | |
dest='offset', | |
type=int, | |
default=1, | |
help='starting offset value for browsing the picture history') | |
parser.add_argument('--timeout', '-t', | |
dest='timeout', | |
type=float, | |
default=5.0, | |
help='time to wait in seconds between downloads') | |
return parser | |
def form_file_name(urlpath): | |
''' | |
Parse a simpledesktops.com URL and turn it in to a file name that | |
takes the form yyyy-mm-dd-filename so we can handle repeating file | |
names that seem to occur over time with the site. | |
''' | |
# The parsed URLs take the form: | |
# /desktops/2011/07/07/Safari_Desktop_Picture.png | |
filename = os.path.basename(urlpath) | |
matches = re.match('.*/(\d{4})/(\d{2})/(\d{2})/%s$' % filename, urlpath) | |
if matches: | |
filename = '%s-%s-%s-%s' % (matches.group(1), matches.group(2), matches.group(3), filename) | |
else: | |
print 'Error: Cannot parse date from %s' % urlpath | |
return filename | |
def main(): | |
parser = build_command_line_parser() | |
options = parser.parse_args() | |
# If the dowload directory doesn't exist: error | |
if not os.path.isdir(options.path): | |
print 'Error: Download destination %s does not exist' % options.path | |
return 1 | |
if not options.timeout > 1: | |
print 'Error: --timeout value must be > 1 second -- play nice!' | |
return 1 | |
print "Downloading images from: %s" % options.baseurl | |
print "Saving images in: %s" % options.path | |
for i in range(options.offset, 1000): | |
# Browse backwards until we find a file that already exists | |
# on disk and then stop browsing. This lets us run this script | |
# periodically to "catch up" to the last time we downloaded | |
# images from simpledesktops.com... | |
stop_parsing = False | |
url = '%s/%s/' % (options.baseurl, i) | |
b = BeautifulSoup(urllib.urlopen(url).read()) | |
for x in b.findAll(attrs={'class': 'desktop'}): | |
uri = x.find('img')['src'] | |
# Image links are in the form: http://static.simpledesktops.com/desktops/2011/07/07/Safari_Desktop_Picture.png.295x184_q100.png | |
# Need to drop the .295x184_q100.png from the end to get the full size version of the image | |
if uri.endswith('.295x184_q100.png'): | |
uri = uri.replace('.295x184_q100.png', '') | |
else: | |
print 'Error: Encountered an img URI I do not understand: %s' % uri | |
return 1 | |
file_name = form_file_name(urlparse.urlparse(uri).path) | |
f = os.path.join(options.path, file_name) | |
if os.path.isfile(f): | |
print 'Found existing image %s -- halting downloads' % f | |
stop_parsing = True | |
break | |
print '[%04d] Downloading %s --> %s' % (i, uri, f) | |
urllib.urlretrieve(uri, f) | |
sleep(options.timeout) | |
if stop_parsing: | |
break | |
print 'All done!' | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment