gabrielfalcao · August 23, 2010 18:38
diff --git a/image-downloader.py b/image-downloader.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # <image downloader - bulk-download all images>
 # Copyright (C) <2010>  Gabriel Falcão <[email protected]>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

 import os
 import re
 import sys
 import couleur
 import warnings
 warnings.simplefilter("ignore")

 import unicodedata
 from urlparse import urlsplit
 from bolacha import Bolacha
 from lxml import html

 args = sys.argv[:]
 if len(args) != 2:
    print "usage %s URL" % __file__
    sys.exit(1)

 def normalize(string):
    string = "_".join(re.findall("\w+", string)).lower()
    return unicodedata.normalize('NFKD', unicode(string)).encode('ascii', 'ignore')

 # colors
 couleur.proxy(sys.stdout).enable()

 # argument
 args.pop(0)
 url = args[0]

 # http fetcher
 http = Bolacha()

 headers, body = http.get(url)
 dom = html.fromstring(body)
 image_urls = [img.attrib['src'] for img in dom.cssselect('img') if 'src' in img.attrib and img.attrib['src'].lower().startswith('http')]

 dirname = normalize(url.replace('http://', ''))

 sys.stdout.write('#{bold}#{red}creating the directory "%s"' % dirname)
 try:
    os.makedirs(dirname)
 except OSError, e:
    if e.errno == 17:
        pass

 os.chdir(dirname)
 print "#{green} DONE!#{reset}"
 print "=" * 10

 for link in image_urls:
    sys.stdout.write('#{bold}#{white}downloading image "%s"' % link)
    link_parts = urlsplit(link)
    filename = normalize(link_parts.path.split("/")[-1])
    headers, body = http.get(link)
    sys.stdout.write('#{yellow} and now i am saving at "%s" ...' % filename)
    fd = open(filename, 'w')
    fd.write(body)
    fd.close()
    print "#{green} DONE!#{reset}"
	#!/usr/bin/env python
	# -- coding: utf-8 --
	# <image downloader - bulk-download all images>
	# Copyright (C) <2010> Gabriel Falcão <[email protected]>
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <http://www.gnu.org/licenses/>.

	import os
	import re
	import sys
	import couleur
	import warnings
	warnings.simplefilter("ignore")

	import unicodedata
	from urlparse import urlsplit
	from bolacha import Bolacha
	from lxml import html

	args = sys.argv[:]
	if len(args) != 2:
	print "usage %s URL" % __file__
	sys.exit(1)

	def normalize(string):
	string = "_".join(re.findall("\w+", string)).lower()
	return unicodedata.normalize('NFKD', unicode(string)).encode('ascii', 'ignore')

	# colors
	couleur.proxy(sys.stdout).enable()

	# argument
	args.pop(0)
	url = args[0]

	# http fetcher
	http = Bolacha()

	headers, body = http.get(url)
	dom = html.fromstring(body)
	image_urls = [img.attrib['src'] for img in dom.cssselect('img') if 'src' in img.attrib and img.attrib['src'].lower().startswith('http')]

	dirname = normalize(url.replace('http://', ''))

	sys.stdout.write('#{bold}#{red}creating the directory "%s"' % dirname)
	try:
	os.makedirs(dirname)
	except OSError, e:
	if e.errno == 17:
	pass

	os.chdir(dirname)
	print "#{green} DONE!#{reset}"
	print "=" * 10

	for link in image_urls:
	sys.stdout.write('#{bold}#{white}downloading image "%s"' % link)
	link_parts = urlsplit(link)
	filename = normalize(link_parts.path.split("/")[-1])
	headers, body = http.get(link)
	sys.stdout.write('#{yellow} and now i am saving at "%s" ...' % filename)
	fd = open(filename, 'w')
	fd.write(body)
	fd.close()
	print "#{green} DONE!#{reset}"