jcarbaugh · August 9, 2011 15:05
diff --git a/challengegov.py b/challengegov.py
 from lxml.html import parse
 import re
 import subprocess
 import urllib2

 _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')

 def slugify(text, delim=u'-'):
    result = []
    for word in _punct_re.split(text.lower()):
        word = word.encode('ascii', errors='ignore')
        if word:
            result.append(word)
    return unicode(delim.join(result))

 LIST_URL = "http://challenge.gov/search?page=%i&type=challenges"

 for i in range(1, 24):
    
    res = urllib2.urlopen(LIST_URL % i)
    root = parse(res).getroot()
    res.close()
    
    for a in root.cssselect(".info h2 a"):
        filename = "%s.jpg" % slugify(a.text_content())
        subprocess.call(('webkit2png', '-T', '-o', filename, a.get("href")))
	from lxml.html import parse
	import re
	import subprocess
	import urllib2

	_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{\|},.]+')

	def slugify(text, delim=u'-'):
	result = []
	for word in _punct_re.split(text.lower()):
	word = word.encode('ascii', errors='ignore')
	if word:
	result.append(word)
	return unicode(delim.join(result))

	LIST_URL = "http://challenge.gov/search?page=%i&type=challenges"

	for i in range(1, 24):

	res = urllib2.urlopen(LIST_URL % i)
	root = parse(res).getroot()
	res.close()

	for a in root.cssselect(".info h2 a"):
	filename = "%s.jpg" % slugify(a.text_content())
	subprocess.call(('webkit2png', '-T', '-o', filename, a.get("href")))
No results found