Skip to content

Instantly share code, notes, and snippets.

@chesster
Created December 30, 2013 12:18
Show Gist options
  • Save chesster/8181471 to your computer and use it in GitHub Desktop.
Save chesster/8181471 to your computer and use it in GitHub Desktop.
Download all Dilbert strips do a directory or display a link
#!/usr/bin/env python
from __future__ import print_function
import urllib
import sys
import os
from datetime import date, timedelta, datetime
from BeautifulSoup import BeautifulSoup
class DilbertDownload(object):
"""Downloads all Dilbert Strips"""
def __init__(self, path = '.'):
self.base_url = "http://www.dilbert.com"
self.url = "%s/strips/comic/%%s/" % self.base_url
self.date_format = "%Y-%m-%d"
self.start_date = datetime.strptime('1989-04-16', self.date_format)
if os.path.isdir(path):
self.path = path
else:
raise Exception('Directory "%s" does not exist' % path)
def get_strip(self, url):
soup = BeautifulSoup(urllib.urlopen(url).read())
try:
img = soup.find('img', attrs={'alt': 'The Official Dilbert Website featuring Scott Adams Dilbert strips, animations and more'})
return self.base_url + img['src']
except Exception, e:
print("[ERROR]: %s" % e)
return None
def get_strip_url(self, date):
date_string = date.strftime(self.date_format)
return self.get_strip(self.url % date_string)
def download_strip(self, date):
date_string = date.strftime(self.date_format)
print(date_string, end="...")
img_url = self.get_strip(self.url % date_string)
extention = img_url.split('.')[-1]
urllib.urlretrieve(img_url, "%s/%s.%s" % (self.path, date_string, extention))
print("[DONE]")
def download_all(self):
end_date = datetime.today()
while end_date > self.start_date:
self.download_strip(end_date)
end_date = end_date-timedelta(days=1)
def get_todays_url(self):
today = datetime.today()
return (self.get_strip_url(today))
if __name__ == "__main__":
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-d", "--directory", dest="directory", help="download directory")
parser.add_option("-t", "--today", default=True, action="store_false", dest="todayslink", help="Get todays link")
(opt, args) = parser.parse_args()
if opt.todayslink:
if opt.directory:
dl=DilbertDownload(opt.directory)
else:
dl=DilbertDownload()
dl.download_all()
else:
dl=DilbertDownload()
print(dl.get_todays_url())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment