Last active
January 15, 2020 07:54
-
-
Save marvindpunongbayan/feb45f2b03a00b4df893d1e3f413c41e to your computer and use it in GitHub Desktop.
AWS Move & Rename Folder and Files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Using aws-sdk and aws-sdk-s3 v1. | |
# Allows to move a folder and files inside of it to other directory | |
# Allows to rename a file or folder. | |
# Thanks to: https://gist.github.com/peterwake/49a829d2062ad3c5962435341692a680 | |
# Usage: | |
# s3 = Aws::S3::Resource.new(credentials) | |
# s3_move = S3::Move.new(s3, ENV['S3_BUCKET_NAME']) | |
# s3_move.move_folder(old_key: object_path, new_key: new_object_path, mode: "rename") | |
class S3::Move | |
FIVE_MB = 5 * 1024 * 1024 | |
MAX_THREADS = 25 | |
def initialize(s3_connection, bucket_name) | |
@s3_connection = s3_connection | |
@bucket_name = bucket_name | |
end | |
def move_folder(args = {}) | |
old_key = args[:old_key] | |
new_key = args[:new_key] | |
mode = args[:mode] || "move" # rename or move | |
kind = args[:kind] || "folder" # folder or file | |
bucket = @s3_connection.bucket(@bucket_name) | |
s3_client = bucket.client | |
moves = [] | |
# strip preceding '/' | |
sanitized_old_key = old_key.sub(/\A\//, "") | |
if mode == "rename" | |
sanitized_new_key = new_key.sub(/\A\//, "") | |
else | |
sanitized_new_key = new_key | |
end | |
if kind == "folder" | |
delimeter = "" | |
prefix_url = "#{sanitized_old_key}/" | |
moves << [sanitized_old_key, sanitized_new_key, false] # Add the folder in array to move or rename and filter only the files inside of it. | |
else | |
delimeter = "/" | |
prefix_url = "#{sanitized_old_key}" | |
end | |
t1 = Time.now | |
Rails.logger.info "#{kind.titleize} (#{mode.titleize}) - #{sanitized_old_key.inspect} to #{sanitized_new_key.inspect}" | |
objects = s3_client.list_objects({ | |
prefix: prefix_url, | |
delimiter: delimeter, | |
bucket: @bucket_name, | |
encoding_type: 'url' | |
}) | |
loop do | |
objects["contents"].each do |object| | |
from_key = CGI::unescape(object.key) | |
to_key = from_key.sub(sanitized_old_key, sanitized_new_key) | |
multipart_copy = object.size > FIVE_MB | |
moves << [from_key, to_key, multipart_copy] | |
end | |
break unless objects.next_page? | |
objects = objects.next_page | |
end | |
elapsed_ms = ((Time.now - t1) * 1000).to_i | |
Rails.logger.info "Identified #{moves.count} AWS objects. Total elapsed = #{elapsed_ms}ms" | |
# move files and subfolders before their parent folder | |
moves.sort_by! { |move| -move[0].length } | |
queue = moves.inject(Queue.new, :push) | |
thread_count = [MAX_THREADS, moves.length].min | |
threads = Array.new(thread_count) do | |
Thread.new do | |
begin | |
while move = queue.pop(true) | |
from_key, to_key, multipart_copy = move | |
obj = bucket.object(from_key) | |
obj.move_to(bucket: @bucket_name, key: to_key, multipart_copy: multipart_copy) | |
end | |
rescue ThreadError | |
# caught if the queue is empty, or if we are out of memory to generate a new thread | |
end | |
end | |
end | |
threads.each(&:join) | |
elapsed_ms = ((Time.now - t1) * 1000).to_i | |
Rails.logger.info "#{mode.titleize}d files. Total elapsed = #{elapsed_ms}ms" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment