Created
November 2, 2010 01:54
-
-
Save valda/659165 to your computer and use it in GitHub Desktop.
Danbooru 一括ダウンローダー
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
require 'rubygems' | |
require 'active_support' | |
require 'mechanize' | |
require 'optparse' | |
require 'fileutils' | |
Version = "1.0.0" | |
options = { :tag => nil, :dir => 'downloads' } | |
parser = OptionParser.new do |ps| | |
ps.on("-t TAG", "--tag TAG", "fetch tag") { |v| options[:tag] = v } | |
ps.on("-d DIR", "--dir DIR", "save dir") { |v| options[:dir] = v } | |
ps.on("-p NUM", "--page NUM", "start page number") { |v| options[:page] = v } | |
ps.parse!(ARGV) | |
end | |
if options[:tag].blank? | |
puts parser.help | |
exit | |
end | |
class Danbooru | |
attr_reader :download_dir | |
def initialize(opts = {}) | |
@agent = Mechanize.new do |a| | |
a.user_agent_alias = 'Windows IE 7' | |
a.max_history = 1 | |
end | |
@base_url = 'http://danbooru.donmai.us/post/index' | |
@download_dir = (opts[:download_dir] or '.') | |
end | |
def fetch_by_tags(tag, pagenum = 1) | |
search_url = URI.join(@base_url, "?tags=#{CGI.escape(tag)}&page=#{pagenum}") | |
while true | |
puts "Open: #{search_url}" | |
page = @agent.get(search_url) | |
page.root.search('//span[@class="thumb" or @class="thumb blacklisted"]/a').each do |elem| | |
image_url = URI.join(@base_url, elem['href']) | |
puts "Open: #{image_url}" | |
image_page = @agent.get(image_url) | |
if link = image_page.links.find {|l| l.text.match(/\d+x\d+ \([\.\d]+ KB\)$/i) } | |
save_path = File.join(@download_dir, File.basename(link.href)) | |
if File.exist? save_path | |
puts "File already exist: #{save_path}" | |
else | |
puts "Download: #{link.href}" | |
orig_image = @agent.get_file(link.href) | |
puts "Save to: #{save_path}" | |
File.open(save_path, 'w') do |fh| | |
fh << orig_image | |
end | |
end | |
end | |
end | |
if next_link = page.links.find {|l| l.text.match(/^(>>|>>)$/) } | |
search_url = URI.join(@base_url, next_link.href) | |
else | |
break | |
end | |
end | |
end | |
end | |
if not File.exist? options[:dir] | |
FileUtils.mkpath options[:dir] | |
end | |
g = Danbooru.new(:download_dir => options[:dir]) | |
g.fetch_by_tags(options[:tag], options[:page]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment