Created
June 21, 2013 11:28
-
-
Save Zhomart/5830619 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 2ch image saver written on ruby. | |
# create list.txt and put thread url on each line | |
# sample: | |
# | |
# $ cat list.txt | |
# http://2ch.hk/aa/res/55017.html | |
# http://2ch.hk/aa/res/22059.html | |
# http://2ch.hk/aa/res/40624.html | |
# http://2ch.hk/aa/res/51504.html | |
require 'open-uri' | |
require 'nokogiri' | |
require 'uri' | |
urls = [] | |
File.open("list.txt") do |f| | |
urls = f.map(&:strip) | |
end | |
exts = %w(jpg gif png) | |
downloaded_urls = [] | |
if File.exists?('downloaded.txt') | |
f = File.open('downloaded.txt', 'r') | |
downloaded_urls = f.readlines.map(&:strip) | |
f.close | |
end | |
# to handle CTRL-C | |
begin | |
urls.reverse.each do |base_url| | |
p "Downloading images from: #{base_url}" | |
uri = URI(base_url) | |
next if base_url.empty? | |
dir = base_url[/[\w\d]{5,}/] || base_url[/\d{2,}/] || base_url[/[\w\d]+/] | |
Dir.mkdir(dir) if not File.directory?(dir) | |
html = nil | |
begin | |
html = Nokogiri::HTML.parse(uri.open) | |
rescue | |
next | |
end | |
count = html.root.css("img").size | |
html.root.css("img").each_with_index do |img, index| | |
thumb_src = img.attribute('src') ? img.attribute('src').value : nil | |
next unless thumb_src | |
id = $1 if thumb_src =~ /.*thumb\/(\d+).*\.\w+/ | |
next unless id | |
found = false | |
exts.each do |ext| | |
src = "http://#{uri.host}/aa/src/#{id}.#{ext}" | |
path = File.join(dir, id + "." + ext) | |
break if found = File.exists?(path) | |
end | |
next if found | |
exts.each do |ext| | |
src = "http://#{uri.host}/aa/src/#{id}.#{ext}" | |
path = File.join(dir, id + "." + ext) | |
percent = index * 100.0 / count | |
print "(%03d/%02d%%) -> (#{dir}) #{src}\n" % [index + 1, percent.to_i] | |
next if downloaded_urls.include?(src) | |
downloaded_urls << src | |
begin | |
open(path, 'wb') do |file| | |
file << open(src).read | |
end | |
rescue OpenURI::HTTPError | |
end | |
if File.exists?(path) && File.size(path) == 0 | |
File.delete(path) | |
else | |
break | |
end | |
end | |
end | |
print "(%03d/100%%) -> Done!\n" % count | |
end | |
rescue SystemExit, Interrupt | |
print "Stopped!" | |
ensure | |
print "\nAll images from sources downloaded! Check the dirs please.\n" | |
File.open('downloaded.txt', 'w') do |f| | |
f.write(downloaded_urls.join("\n")) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment