mrdaemon · February 22, 2011 15:41
diff --git a/output.txt b/output.txt
 [0:527] callisto:one_off_hacks $ python ScrapeThemes_irssi.py 
 Shitty image scraper v1
 (c) Alexandre Gauthier 2010-2011
 --------------------------------------
 Downloading all the themes from http://irssi.org/themes
 Terribly hardcoded output dir is out
 NOTE: Created directory 'out'
 /themefiles/h3rbz.png matches pattern, fetching...  OK
 /themefiles/h3rbz.theme matches pattern, fetching...  OK
 /themefiles/spring.png matches pattern, fetching...  OK
 /themefiles/spring.theme matches pattern, fetching...  OK
 /themefiles/dark_winter.png matches pattern, fetching...  OK
 /themefiles/dark_winter.theme matches pattern, fetching...  OK
 /themefiles/elite.png matches pattern, fetching...  OK
 /themefiles/elite.theme matches pattern, fetching...  OK
 /themefiles/revolutionary.png matches pattern, fetching...  OK
 /themefiles/revolutionary.theme matches pattern, fetching...  OK
 /themefiles/revolutionaryv2.png matches pattern, fetching...  OK
 [...]
diff --git a/ScrapeThemes_irssi.py b/ScrapeThemes_irssi.py
 #!/usr/bin/env python
 #
 # Shittiest script to scrape http://irssi.org/themes and fetch the theme
 # image screenshots, because frankly, that shit is unviewable.

 from os import path, mkdir
 import re
 import sys
 import urllib2
 from urlparse import urlsplit, urljoin

 from BeautifulSoup import BeautifulSoup

 LOCATION = "http://irssi.org/themes"

 def parse_url(pageurl):
    """ Parse url, returns soup." """

    try:
        page_handle = urllib2.urlopen(pageurl)
    except urllib2.URLError as e:
        print "Error: Failed to connect to %s: %s" % (pageurl, e.reason)
        sys.exit(1)
    else:
        return BeautifulSoup(page_handle)


 def download(url):
    """ Download a file from url to destination """
    try:
        data = urllib2.urlopen(url).read()
    except urllib2.URLError as e:
        print "Failed to download %s: %s" % (url, e.reason)
        return False
    else:
        filename = path.basename(urlsplit(url)[2])
        try:
            out = open(path.join("out", filename), 'wb')
            out.write(data)
            out.close()
        except IOError as e:
            print "Error occured saving file: %s" % (e)
            return False
        else:
            return True

 def main():
    print "Shitty image scraper v1"
    print "(c) Alexandre Gauthier 2010-2011"
    print "--------------------------------------"

    print "Downloading all the themes from %s" % (LOCATION)
    print "Terribly hardcoded output dir is %s" % "out"

    soup = parse_url(LOCATION)

    p = re.compile('^\/themefiles\/.+\.\w+', re.IGNORECASE)

    if path.isdir('out'):
        pass
    elif path.isfile('out'):
        print "A file named 'out' already exists here. Bailing."
        sys.exit(1)
    else:
        # Bah, let it throw and exception, that's enough try/catch
        # bullshit for today, I think.
        mkdir('out')
        print "NOTE: Created directory 'out'"

    for link in soup.findAll('a'):
        if p.match(link['href']):
            print "%s matches pattern, fetching... " % link['href'],
            if download(urljoin(LOCATION,link['href'])):
                print "OK"
            else:
                print "FAILED!"


 if __name__ == '__main__':
    main()
	[0:527] callisto:one_off_hacks $ python ScrapeThemes_irssi.py
	Shitty image scraper v1
	(c) Alexandre Gauthier 2010-2011
	--------------------------------------
	Downloading all the themes from http://irssi.org/themes
	Terribly hardcoded output dir is out
	NOTE: Created directory 'out'
	/themefiles/h3rbz.png matches pattern, fetching... OK
	/themefiles/h3rbz.theme matches pattern, fetching... OK
	/themefiles/spring.png matches pattern, fetching... OK
	/themefiles/spring.theme matches pattern, fetching... OK
	/themefiles/dark_winter.png matches pattern, fetching... OK
	/themefiles/dark_winter.theme matches pattern, fetching... OK
	/themefiles/elite.png matches pattern, fetching... OK
	/themefiles/elite.theme matches pattern, fetching... OK
	/themefiles/revolutionary.png matches pattern, fetching... OK
	/themefiles/revolutionary.theme matches pattern, fetching... OK
	/themefiles/revolutionaryv2.png matches pattern, fetching... OK
	[...]
	#!/usr/bin/env python
	#
	# Shittiest script to scrape http://irssi.org/themes and fetch the theme
	# image screenshots, because frankly, that shit is unviewable.

	from os import path, mkdir
	import re
	import sys
	import urllib2
	from urlparse import urlsplit, urljoin

	from BeautifulSoup import BeautifulSoup

	LOCATION = "http://irssi.org/themes"

	def parse_url(pageurl):
	""" Parse url, returns soup." """

	try:
	page_handle = urllib2.urlopen(pageurl)
	except urllib2.URLError as e:
	print "Error: Failed to connect to %s: %s" % (pageurl, e.reason)
	sys.exit(1)
	else:
	return BeautifulSoup(page_handle)


	def download(url):
	""" Download a file from url to destination """
	try:
	data = urllib2.urlopen(url).read()
	except urllib2.URLError as e:
	print "Failed to download %s: %s" % (url, e.reason)
	return False
	else:
	filename = path.basename(urlsplit(url)[2])
	try:
	out = open(path.join("out", filename), 'wb')
	out.write(data)
	out.close()
	except IOError as e:
	print "Error occured saving file: %s" % (e)
	return False
	else:
	return True

	def main():
	print "Shitty image scraper v1"
	print "(c) Alexandre Gauthier 2010-2011"
	print "--------------------------------------"

	print "Downloading all the themes from %s" % (LOCATION)
	print "Terribly hardcoded output dir is %s" % "out"

	soup = parse_url(LOCATION)

	p = re.compile('^\/themefiles\/.+\.\w+', re.IGNORECASE)

	if path.isdir('out'):
	pass
	elif path.isfile('out'):
	print "A file named 'out' already exists here. Bailing."
	sys.exit(1)
	else:
	# Bah, let it throw and exception, that's enough try/catch
	# bullshit for today, I think.
	mkdir('out')
	print "NOTE: Created directory 'out'"

	for link in soup.findAll('a'):
	if p.match(link['href']):
	print "%s matches pattern, fetching... " % link['href'],
	if download(urljoin(LOCATION,link['href'])):
	print "OK"
	else:
	print "FAILED!"


	if __name__ == '__main__':
	main()