Created
December 11, 2018 16:06
-
-
Save peterwake/49a829d2062ad3c5962435341692a680 to your computer and use it in GitHub Desktop.
AWS folder move using threading
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a sample class that you'll probably need | |
# to do a bit of work on for your own needs, but it | |
# will hopefully save you a lot of the headaches that we had | |
class AwsMove | |
FIVE_MB = 5 * 1024 * 1024 | |
MAX_THREADS = 25 | |
def move_folder(s3_client, old_key, new_key) | |
# strip preceding '/' | |
sanitized_old_key = old_key.sub(/\A\//, "") | |
if params[:mode] == 'rename' | |
sanitized_new_key = new_key.sub(/\A\//, "") | |
else | |
sanitized_new_key = new_key | |
end | |
t1 = Time.now | |
Rails.logger.info "Moving" | |
moves = [] | |
objects = s3_client.list_objects_v2(bucket: bucket_name, prefix: sanitized_old_key, encoding_type: 'url') | |
loop do | |
objects.contents.each do |object| | |
from_key = CGI::unescape(object.key) | |
to_key = from_key.sub(sanitized_old_key, sanitized_new_key) | |
multipart_copy = object.size > FIVE_MB | |
moves << [from_key, to_key, multipart_copy] | |
end | |
break unless objects.next_page? | |
objects = objects.next_page | |
end | |
elapsed_ms = ((Time.now - t1) * 1000).to_i | |
Rails.logger.info "Identified #{moves.count} AWS objects. Total elapsed = #{elapsed_ms}ms" | |
# move files and subfolders before their parent folder | |
moves.sort_by! { |move| -move[0].length } | |
queue = moves.inject(Queue.new, :push) | |
thread_count = [MAX_THREADS, moves.length].min | |
threads = Array.new(thread_count) do | |
Thread.new do | |
begin | |
while move = queue.pop(true) | |
from_key, to_key, multipart_copy = move | |
s3_object(s3_client, from_key).move_to(bucket: bucket_name, key: to_key, multipart_copy: multipart_copy) | |
end | |
rescue ThreadError | |
# caught if the queue is empty, or if we are out of memory to generate a new thread | |
end | |
end | |
end | |
threads.each(&:join) | |
elapsed_ms = ((Time.now - t1) * 1000).to_i | |
Rails.logger.info "Moved files. Total elapsed = #{elapsed_ms}ms" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A revised version based on this at: https://gist.github.com/marvindpunongbayan/feb45f2b03a00b4df893d1e3f413c41e