Skip to content

Instantly share code, notes, and snippets.

@dulltz
Last active August 29, 2015 14:22
Show Gist options
  • Select an option

  • Save dulltz/dafd1de754e6b4e978e3 to your computer and use it in GitHub Desktop.

Select an option

Save dulltz/dafd1de754e6b4e978e3 to your computer and use it in GitHub Desktop.
booru clawler
require 'faraday'
require 'json'
require 'open-uri'
require 'fileutils'
def get_posts tags=['genga','yoh_yoshinari'], site_url='http://sakuga.yshi.org', mode=:normal
conn = Faraday::Connection.new(url: site_url) do |builder|
builder.use Faraday::Request::UrlEncoded
builder.use Faraday::Response::Logger
builder.use Faraday::Adapter::NetHttp
end
if mode != :all then
response = conn.get do |request|
request.url '/post.json'
request.params[:tags] = tags.join(' ')
end
json = JSON.parser.new(response.body)
return json.parse
elsif mode == :all then
i = 0
posts = []
for i in 1..10 do
i += 1
response = conn.get do |request|
request.url '/post.json'
request.params[:tags] = tags.join(' ')
request.params[:pages] = i
end
json = JSON.parser.new(response.body)
posts = posts + json.parse
puts "page: " + i.to_s
puts posts.first["id"]
sleep(rand(2)+1)
end
return posts
end
end
def get_files posts
path = 'data/'
FileUtils.mkdir_p(path)
posts.each do |post|
sleep(rand(2)+1)
fname = '[' + post['tags'].split(' ').join('][') + ']' + post['id'].to_s + '.' + post['file_url'].split('.').last
puts fname
open(path + fname, 'wb') do |f|
open(post['file_url']) do |binary_data|
f.write(binary_data.read)
end
end
end
end
if __FILE__ == $PROGRAM_NAME
tags = ["yukiko_horiguchi"]
posts = get_posts(tags)
get_files(posts)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment