-
-
Save leemour/38530bb6ebf46beb730789b6e0d60570 to your computer and use it in GitHub Desktop.
Upload folder to S3 recursively with ruby, multi threads and aws-sdk v3 gem
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# frozen_string_literal: true | |
require 'rubygems' | |
require 'aws-sdk' | |
module S3 | |
# Upload directory recursively to S3 | |
class DirectoryUpload | |
attr_reader :folder_path, :bucket, :include_folder | |
attr_accessor :files | |
# Initialize the upload class | |
# | |
# folder_path - path to the folder that you want to upload | |
# bucket - The bucket you want to upload to | |
# aws_key - Your key generated by AWS defaults to the env var AWS_KEY_ID | |
# aws_secret - The secret generated by AWS | |
# include_folder - include the root folder on the path? (default: true) | |
# | |
# Examples | |
# => uploader = DirectoryUpload.new("some_route/test_folder", | |
# 'your_bucket_name') | |
# | |
def initialize(folder_path, bucket, region: ENV['AWS_REGION'], | |
aws_key: ENV['AWS_KEY_ID'], aws_secret: ENV['AWS_SECRET'], | |
include_folder: true | |
) | |
if aws_key && !aws_key.to_s.empty? | |
Aws.config.update( | |
region: region, | |
credentials: Aws::Credentials.new(aws_key, aws_secret) | |
) | |
end | |
@folder_path = folder_path | |
@files = Dir.glob("#{folder_path}/**/*") | |
@bucket = bucket | |
@include_folder = include_folder | |
end | |
# public: Upload files from the folder to S3 | |
# | |
# thread_count - How many threads you want to use (defaults to 5) | |
# simulate - Don't perform upload, just simulate it (default: false) | |
# verbose - Verbose info (default: false) | |
# | |
# Examples | |
# => uploader.upload!(20) | |
# true | |
# => uploader.upload! | |
# true | |
# | |
# Returns true when finished the process | |
def upload!(thread_count = 5, simulate = false, verbose = false) | |
puts "Total files: #{files.size}... uploading (folder #{folder_path} " + | |
"#{include_folder ? '' : 'not '}included)" | |
threads = generate_threads(thread_count, simulate, verbose) | |
threads.each { |t| t.join } | |
end | |
# rubocop:disable Metrics/MethodLength | |
def generate_threads(count, simulate = false, verbose = false) | |
mutex = Mutex.new | |
file_number = 0 | |
count.times.with_object({}) do |threads, i| | |
threads[i] = Thread.new do | |
until files.empty? | |
mutex.synchronize do | |
file_number += 1 | |
Thread.current["file_number"] = file_number | |
end | |
file = files.pop rescue nil | |
next unless file | |
upload_file(file, simulate, verbose) | |
end | |
end | |
threads | |
end | |
end | |
# rubocop:enable Metrics/MethodLength | |
def upload_file(file, simulate = false, verbose = false) | |
# Define destination path | |
path = include_folder ? file : file.sub(/^#{folder_path}\//, '') | |
if verbose | |
puts "[#{Thread.current["file_number"]}/#{files.size}] uploading..." | |
end | |
return if File.directory?(path) || simulate | |
obj = connection.bucket(@bucket).object(path) | |
obj.put(body: File.read(file)) | |
end | |
def connection | |
@connection ||= Aws::S3::Resource.new | |
end | |
end | |
end | |
# Sample usage: | |
# uploader = S3::DirectoryUpload.new('test', 'miles-media-library') | |
# uploader.upload! | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I had to change the order of params in the line 67, the #with_object call: