Skip to content

Instantly share code, notes, and snippets.

@jcarbaugh
Created August 9, 2011 15:05
Show Gist options
  • Save jcarbaugh/1134300 to your computer and use it in GitHub Desktop.
Save jcarbaugh/1134300 to your computer and use it in GitHub Desktop.
Get a screenshot for each challenge on challenge.gov
from lxml.html import parse
import re
import subprocess
import urllib2
_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
def slugify(text, delim=u'-'):
result = []
for word in _punct_re.split(text.lower()):
word = word.encode('ascii', errors='ignore')
if word:
result.append(word)
return unicode(delim.join(result))
LIST_URL = "http://challenge.gov/search?page=%i&type=challenges"
for i in range(1, 24):
res = urllib2.urlopen(LIST_URL % i)
root = parse(res).getroot()
res.close()
for a in root.cssselect(".info h2 a"):
filename = "%s.jpg" % slugify(a.text_content())
subprocess.call(('webkit2png', '-T', '-o', filename, a.get("href")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment