Skip to content

Instantly share code, notes, and snippets.

@therod
Last active January 18, 2017 09:15
Show Gist options
  • Save therod/4f8ee372c065c6b893aa365627f3f653 to your computer and use it in GitHub Desktop.
Save therod/4f8ee372c065c6b893aa365627f3f653 to your computer and use it in GitHub Desktop.
Automatically download the latest Podesta E-Mails from Wikileaks
#!/usr/bin/env ruby
require 'typhoeus'
Dir.mkdir('mail') unless Dir.exist?('mail')
def last_email
Dir.entries('mail').last.scan(/\d/).join('').sub!(/^0+/, '').to_i
end
def write_email(id, body)
number = format '%010d', id
puts "Writing E-Mail: #{number}"
File.open("mail/email_#{number}.eml", 'w') { |file| file.write(body) }
end
def get_batch(ids)
hydra = Typhoeus::Hydra.new
ids.map do |id|
request = Typhoeus::Request.new(
"https://wikileaks.org/podesta-emails/get/#{id}", followlocation: true
)
request.on_complete do |response|
if response.body.include?('<h1>Internal Server Error</h1>')
puts "Could not find E-Mail with the id: #{id}, stopping"
exit 0
end
write_email(id, response.body)
end
hydra.queue(request)
request
end
hydra.run
end
count = last_email
count = 1 if last_email.zero?
puts 'Last e-mail was: ' + count.to_s
while count
ids = (count..count + 10).to_a
get_batch(ids)
count += 10
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment