Last active
January 18, 2017 09:15
-
-
Save therod/4f8ee372c065c6b893aa365627f3f653 to your computer and use it in GitHub Desktop.
Automatically download the latest Podesta E-Mails from Wikileaks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'typhoeus' | |
Dir.mkdir('mail') unless Dir.exist?('mail') | |
def last_email | |
Dir.entries('mail').last.scan(/\d/).join('').sub!(/^0+/, '').to_i | |
end | |
def write_email(id, body) | |
number = format '%010d', id | |
puts "Writing E-Mail: #{number}" | |
File.open("mail/email_#{number}.eml", 'w') { |file| file.write(body) } | |
end | |
def get_batch(ids) | |
hydra = Typhoeus::Hydra.new | |
ids.map do |id| | |
request = Typhoeus::Request.new( | |
"https://wikileaks.org/podesta-emails/get/#{id}", followlocation: true | |
) | |
request.on_complete do |response| | |
if response.body.include?('<h1>Internal Server Error</h1>') | |
puts "Could not find E-Mail with the id: #{id}, stopping" | |
exit 0 | |
end | |
write_email(id, response.body) | |
end | |
hydra.queue(request) | |
request | |
end | |
hydra.run | |
end | |
count = last_email | |
count = 1 if last_email.zero? | |
puts 'Last e-mail was: ' + count.to_s | |
while count | |
ids = (count..count + 10).to_a | |
get_batch(ids) | |
count += 10 | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment