Skip to content

Instantly share code, notes, and snippets.

@rafapolo
Last active October 6, 2015 09:38
Show Gist options
  • Save rafapolo/2973678 to your computer and use it in GitHub Desktop.
Save rafapolo/2973678 to your computer and use it in GitHub Desktop.
Crawleia torrents e capas do MakingOff.org
#encoding: utf-8
#copyfight extrapolo
#20.jun.2012
puts "="*40
puts "\t MakingOff Copyfight"
puts "="*40
puts "Carregando..."
require 'rubygems'
require 'mechanize'
browser = Mechanize.new
browser.user_agent_alias = 'Mac Safari'
puts "Autenticando..."
browser.get('http://www.makingoff.org/forum/index.php') do |page|
page.form_with(:id => 'login') do |f|
f.username = 'login'
f.password = 'senha'
end.click_button
end
pra_valer = true
puts "Listando tudo frenéticamente..."
# de .. até
(0..595).each do |pagina|
puts "="*40
puts "# Página #{pagina} #"
browser.get("http://indice.makingoff.org/?pg=#{pagina}") do |page|
page.search('.linha').each do |row|
puts "="*40
links = row.search('a')
ano = links[3].text
nome = links[0].text
diretor = links[4].text
pais = links[5].text
dir = "#{ano} - #{nome} - #{diretor} - #{pais}"
dir = dir.gsub('/', "-").gsub(')', "").gsub('(', "").gsub('.', "").gsub(/\s{2,}/, ' ').strip
begin
if !File.directory? dir
href = links[0]['href']
puts dir
FileUtils.mkdir(dir) if pra_valer
browser.get(href) do |d|
d.search('//a[starts-with(@href, "http://www.makingoff.org/forum/index.php?app=core&module=attach&section=attach&attach_id=")]').each do |p|
if !p.text.empty? && !p['href'].empty?
browser.pluggable_parser.default = Mechanize::Download
download = "#{dir}/#{p.text}"
puts p.text
browser.get(p['href']).save(download) if pra_valer
browser.pluggable_parser.default = Mechanize::File
end
end
d.search(".post>table>tr[4]>td>img").to_a.uniq.each do |a|
src = a['src']
ext = src.split('.').last
capa = "#{dir}/capa.#{ext}"
puts "capa.#{ext}"
browser.get(src).save(capa) if pra_valer
end
end
else
puts "Já salvo: #{dir}"
end
rescue
puts "Erro."
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment