###Getting Files in Parallel from GNIP
require 'parallel'
require 'json'
require 'open-uri'
results = JSON.parse(File.read("/big_data/sandy/vuln_neighborhood_contextual/gnip_results.json"))
results["urlList"].count
uuid = 'hpr4y8xh4'
$logfile = "gnip_log.txt"
def log(string)
File.open($logfile,'a') do |f|
f << string << "\n"
end
end
completed = Dir.entries(directory).select!{|x| x.end_with? ".gz"}
directory = "/big_data/sandy/vuln_neighborhood_contextual/gnip"
d = "/big_data/sandy/tmp/"
Dir.mkdir d
ENV["TMPDIR"] = d
processors = 24
files = results["urlList"].each_slice(results["urlList"].count/processors + 1).to_a
puts "Splitting #{results["urlList"].count} over #{processors} processors."
File.write($logfile,'')
Parallel.each_with_index(files, :in_processes => processors) do |files, pidx|
files.each do |url|
begin
#Take URL and parse to create receiving file name.
name = url[url.index(uuid)..(url.index(".gz?")+2)].gsub!("/","_")
if completed.include? name
log("ALREADY DONE: " + name)
next
else
File.open(directory + "/" + name, "wb") do |new_file|
# the following "open" is provided by open-uri
open(url, 'rb') do |read_file|
new_file.write(read_file.read)
end
end
log("SUCCESS: " + name + " " + url)
end
rescue => e
log("ERROR: " + name + e.to_s + " \n " + e.backtrace.to_s)
exit(1)
end
end
end
"done"