Skip to content

Instantly share code, notes, and snippets.

@fukayatsu
Created July 16, 2012 18:32
Show Gist options
  • Save fukayatsu/3124191 to your computer and use it in GitHub Desktop.
Save fukayatsu/3124191 to your computer and use it in GitHub Desktop.
画像取得スクリプト
# coding: utf-8
require 'fileutils'
require 'bbs2ch'
require 'yaml'
menu = BBS2ch::Menu.new
boards = menu.boards(/犬猫大好き/)
boards.each do |board|
threads = board.threads(/猫画像/)
threads.each do |thr|
dat_name = thr.url.split('/').last
dat_dir = "dat/#{thr.extra[:board][:url].split('//')[1][0...-1]}"
dat_path = "#{dat_dir}/#{dat_name}"
FileUtils.mkdir_p dat_dir
before_count = 0
if File.exists? dat_path
before_count = open(dat_path).read.toutf8.count("\n")
`wget -t 3 -N -P #{dat_dir} -c #{thr.url}`
else
`wget -t 3 -N -P #{dat_dir} #{thr.url}`
end
after_count = open(dat_path).read.toutf8.count("\n")
new_lines = open(dat_path).read.toutf8.split("\n")[before_count ... after_count]
tmp_dir = "tmp"
FileUtils.mkdir_p tmp_dir
new_lines.each do |line|
response = BBS2ch::Response.parse(line, thr.extra)
response.images.each do |img|
img_name = CGI.escape img.url
`wget -t 1 --random-wait --timeout=5 -nc -O #{tmp_dir}/#{img_name} #{img.url}`
open("#{tmp_dir}/#{img_name}.txt",'a') do |file|
file << img.extra.to_yaml
end
end
end
img_dir = "img"
FileUtils.mkdir_p img_dir
#サイズ0の画像とメタ情報を削除
Dir.chdir(tmp_dir)
Dir.glob("*.{jpg,png,gif}") do |file|
if File.stat(file).size < 5*1024 || File.open(file).read(1) == '<'
File.delete file
File.delete(file + ".txt")
else
File.rename(file, "../#{img_dir}/#{file}")
File.rename(file+".txt", "../#{img_dir}/#{file}.txt")
end
end
Dir.chdir("../")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment