gabrielfalcao · July 21, 2010 19:03
diff --git a/4chan-download b/4chan-download
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 # Copyright (C) <2010>  Gabriel Falcão <gabriel@nacaolivre.org>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.

 import os
 import sys
 import warnings

 warnings.filterwarnings('ignore')

 try:
    import bolacha
 except ImportError:
    print "you need bolacha installed in order to use this script: [sudo] pip install bolacha"

 try:
    import couleur
 except ImportError:
    print "you need couleur installed in order to use this script: [sudo] pip install couleur"

 try:
    from lxml import html as lhtml
 except ImportError:
    print "you need lxml installed in order to use this script: [sudo] pip install lxml"

 url = sys.argv[-1]

 http = bolacha.Bolacha()
 sh = couleur.Shell(linebreak=True, bold=True)

 sh.cyan("Preparing to download...")

 def dom_of(url):
    return lhtml.fromstring(http.get(url)[1])

 def get_main_page(url):
    sh.green_and_blink_bold_white_and_green("Fetching |all| image urls...", replace=True)
    dom = dom_of(url)
    images_items = dom.cssselect("a[target='_blank']")
    urls = [img.attrib['href'] for img in images_items if 'href' in img.attrib and img.attrib['href'].endswith("jpg")]
    sh.yellow("Downloading images ...", replace=True)

    total = len(urls)
    curfiles = filter(lambda x: os.path.isfile(x), os.listdir(os.curdir))

    for i, url in enumerate(urls):
        index = i + 1
        filename = url.split("/")[-1]
        if filename in curfiles:
            sh.red_and_blue("ignoring already downloaded image %s (%d of %d) ..." % (filename, index, total), replace=True)
            continue

        sh.red("Downloading image %d of %d [%s] ..." % (index, total, url), replace=True)
        img = http.get(url)[1]
        sh.green("Saving image %d of %d [%s] ..." % (index, total, url), replace=True)

        f = open(filename, "w")
        f.write(img)
        f.close()
        sh.black("Saved at %s ..." % filename, replace=True)

    sh.green("Done, %d images saved" % total)

 get_main_page(url)
	#!/usr/bin/env python
	# -- coding: utf-8 --

	# Copyright (C) <2010> Gabriel Falcão <gabriel@nacaolivre.org>
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in
	# all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	# THE SOFTWARE.

	import os
	import sys
	import warnings

	warnings.filterwarnings('ignore')

	try:
	import bolacha
	except ImportError:
	print "you need bolacha installed in order to use this script: [sudo] pip install bolacha"

	try:
	import couleur
	except ImportError:
	print "you need couleur installed in order to use this script: [sudo] pip install couleur"

	try:
	from lxml import html as lhtml
	except ImportError:
	print "you need lxml installed in order to use this script: [sudo] pip install lxml"

	url = sys.argv[-1]

	http = bolacha.Bolacha()
	sh = couleur.Shell(linebreak=True, bold=True)

	sh.cyan("Preparing to download...")

	def dom_of(url):
	return lhtml.fromstring(http.get(url)[1])

	def get_main_page(url):
	sh.green_and_blink_bold_white_and_green("Fetching \|all\| image urls...", replace=True)
	dom = dom_of(url)
	images_items = dom.cssselect("a[target='_blank']")
	urls = [img.attrib['href'] for img in images_items if 'href' in img.attrib and img.attrib['href'].endswith("jpg")]
	sh.yellow("Downloading images ...", replace=True)

	total = len(urls)
	curfiles = filter(lambda x: os.path.isfile(x), os.listdir(os.curdir))

	for i, url in enumerate(urls):
	index = i + 1
	filename = url.split("/")[-1]
	if filename in curfiles:
	sh.red_and_blue("ignoring already downloaded image %s (%d of %d) ..." % (filename, index, total), replace=True)
	continue

	sh.red("Downloading image %d of %d [%s] ..." % (index, total, url), replace=True)
	img = http.get(url)[1]
	sh.green("Saving image %d of %d [%s] ..." % (index, total, url), replace=True)

	f = open(filename, "w")
	f.write(img)
	f.close()
	sh.black("Saved at %s ..." % filename, replace=True)

	sh.green("Done, %d images saved" % total)

	get_main_page(url)
No results found