Skip to content

Instantly share code, notes, and snippets.

@mikechau
Created April 15, 2016 02:18
Show Gist options
  • Save mikechau/277b28c12dbd77678aa48ad29b484525 to your computer and use it in GitHub Desktop.
Save mikechau/277b28c12dbd77678aa48ad29b484525 to your computer and use it in GitHub Desktop.
require 'aws-sdk'
require 'thread'
require 'pathname'
require 'logger'
class S3BulkUploader
attr_reader :thread_count,
:path,
:relative_path_from,
:region,
:bucket,
:acl,
:files,
:logger,
:dry_run,
:excluded_extensions
def self.upload!(**args)
s3_bulk_uploader = new(args)
s3_bulk_uploader.upload!
end
def initialize(
thread_count: 5,
path:,
relative_path_from:,
region: ENV['AWS_REGION'],
bucket:,
acl:,
credentials: Aws::Credentials.new(
ENV['AWS_ACCESS_KEY_ID'],
ENV['AWS_SECRET_ACCESS_KEY']
),
logger: Logger.new(STDOUT).tap do |log|
log.progname = 'S3_BULK_UPLOADER'
end,
dry_run: false,
excluded_extensions: []
)
@thread_count = thread_count
@path = path
@relative_path_from = Pathname.new(relative_path_from)
@region = region
@bucket = bucket
@credentials = credentials
@logger = logger
@dry_run = dry_run
@excluded_extensions = Array(excluded_extensions)
end
def files
@files ||= Dir[File.expand_path(path)]
.reject do |f|
File.directory?(f) || excluded_extensions.include?(File.extname(f))
end
end
def upload!
log(msg: "Starting bulk upload [#{run_type}] to S3... (#{total_file_count} files)")
queue = Queue.new
files.each { |f| queue << f }
threads = thread_count.times.each_with_object([]) do |i, obj|
obj << Thread.new do
while (file_path = queue.pop(true) rescue nil)
s3_upload = send_to_s3(file_path: file_path)
log(msg: "Thread [#{i}]: uploaded #{s3_upload[:key]}")
end
end
end
threads.each { |t| t.join }
log(msg: "Uploaded #{total_file_count} files [#{run_type}] to S3!")
end
private
attr_reader :credentials, :logger
def send_to_s3(file_path:)
pathname = Pathname.new(file_path)
key = pathname.relative_path_from(relative_path_from)
if dry_run
response = {}
else
obj = s3_resource.bucket(bucket).object(key)
response = obj.upload_file(file_path, acl: acl)
end
{
key: key,
response: response
}
end
def log(msg:, status: :info)
return unless logger
logger.send(status, msg)
end
def s3_resource
@resource ||= Aws::S3::Resource.new(client: client)
end
def client
@client ||= Aws::S3::Client.new(
region: region,
credentials: credentials
)
end
def total_file_count
files.size
end
def run_type
dry_run ? 'DRY_RUN' : 'LIVE_RUN'
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment