Created
April 2, 2013 20:53
-
-
Save twslankard/5296081 to your computer and use it in GitHub Desktop.
Ruby S3 multi-part uploader with resume capability. Usage: ruby mpu.rb [file to upload] [bucket] [key] My knowledge of Ruby isn't great, so I offer my apologies for the bad code.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'rubygems' | |
require 'aws-sdk' | |
def readPart(file_name, file_size, part_size, part) | |
file_offset = part_size * ( part - 1 ) | |
bytes_to_read = [ part_size, file_size - file_offset ].min | |
part_contents = File.read( file_name, bytes_to_read, file_offset ) | |
part_md5 = Digest::MD5.hexdigest( part_contents ) | |
return part_contents, part_md5 | |
end | |
def uploadPart(part_contents, part_md5, part, upload) | |
result_obj = upload.add_part( part_contents, :part_number => part ) | |
etag_md5sum = result_obj.etag[ 1..-2 ] # strip off the quotes ... not sure why they decided to include those | |
if etag_md5sum != part_md5 | |
raise "part_md5 mismatch!" | |
end | |
end | |
def shouldUploadPart(part, part_md5, upload) | |
begin | |
if upload.parts[part].etag[1..-2] == part_md5 | |
return false | |
end | |
rescue | |
return true | |
end | |
return true | |
end | |
def uploadPartWithRetry(part_contents, part_md5, part, upload, tries) | |
the_exception = nil | |
for try in 1..tries | |
begin | |
uploadPart(part_contents, part_md5, part, upload) | |
return | |
rescue => e | |
the_exception = e | |
end | |
end | |
raise "too many retries, error: " + the_exception.message | |
end | |
def uploadParts(file_name, file_size, part_size, number_of_parts, upload) | |
skipped_parts = 0 | |
start_time = Time.now | |
for part in 1..number_of_parts | |
part_contents, part_md5 = readPart( file_name, file_size, part_size, part ) | |
if( shouldUploadPart( part, part_md5, upload ) ) | |
uploadPartWithRetry(part_contents, part_md5, part, upload, 3) | |
elapsed_time = Time.now - start_time | |
average_part_time = elapsed_time / (part - skipped_parts) | |
estimated_time_remaining = (average_part_time * (number_of_parts - part) / 60.0).ceil | |
puts "Uploaded #{part}/#{number_of_parts} parts. Estimated time remaining: #{estimated_time_remaining} minutes" | |
else | |
skipped_parts += 1 | |
puts "Skipping already uploaded part #{part}/#{number_of_parts}." | |
end | |
end | |
upload.complete(:remote_parts) | |
end | |
file_name = ARGV[0] | |
bucket_name = ARGV[1] | |
key_name = ARGV[2] | |
s3 = AWS::S3.new | |
bucket = s3.buckets[bucket_name] | |
object = bucket.objects[key_name] | |
number_of_multipart_uploads = 0 | |
object.multipart_uploads.each do |upload| | |
number_of_multipart_uploads += 1 | |
end | |
upload = nil | |
if number_of_multipart_uploads > 1 | |
raise "multiple uploads in progress" # haven't decided what to do here yet | |
elsif number_of_multipart_uploads == 1 | |
upload = object.multipart_uploads.first | |
else | |
upload = object.multipart_upload | |
end | |
file_size = File.size(file_name) | |
part_size = [ (file_size / 10000.0).ceil, 5*1024*1024 ].max | |
number_of_parts = (file_size / part_size.to_f).ceil | |
puts "Uploading #{file_size} byte file as #{number_of_parts} chunks each up to #{part_size} bytes." | |
uploadParts(file_name, file_size, part_size, number_of_parts, upload) | |
puts "Done!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment