-
-
Save edwardsharp/d501af263728eceb361ebba80d7fe324 to your computer and use it in GitHub Desktop.
require 'aws-sdk' | |
class BucketSyncService | |
attr_reader :from_bucket, :to_bucket, :logger | |
attr_accessor :debug | |
DEFAULT_ACL = "public-read" | |
def initialize(from_bucket, to_bucket) | |
@from_bucket = bucket_from_credentials(from_bucket) | |
@to_bucket = bucket_from_credentials(to_bucket) | |
end | |
def perform(output=STDOUT) | |
object_counts = {sync:0, skip:0} | |
create_logger(output) | |
logger.info "Starting sync." | |
from_bucket.objects.each do |object| | |
if object_needs_syncing?(object) | |
sync(object) | |
object_counts[:sync] += 1 | |
else | |
logger.debug "Skipped #{pp object}" | |
object_counts[:skip] += 1 | |
end | |
end | |
logger.info "Done. Synced #{object_counts[:sync]}, " + | |
"skipped #{object_counts[:skip]}." | |
end | |
private | |
def create_logger(output) | |
@logger = Logger.new(output).tap do |l| | |
l.level = debug ? Logger::DEBUG : Logger::INFO | |
end | |
end | |
def sync(object) | |
logger.debug "Syncing #{pp object}" | |
to_bucket.object(object.key).copy_from(copy_source: "#{object.bucket_name}/#{object.key}", acl: DEFAULT_ACL) | |
end | |
def pp(object) | |
content_length_in_kb = object.content_length / 1024 | |
"#{object.key} #{content_length_in_kb}k " + | |
"#{object.last_modified.strftime("%b %d %Y %H:%M")}" | |
end | |
def object_needs_syncing?(object) | |
to_object = to_bucket.object(object.key) | |
return true if !to_object.exists? | |
return to_object.etag != object.etag | |
end | |
def bucket_from_credentials(bckt) | |
bucket = Aws::S3::Bucket.new(bckt) | |
unless bucket.exists? | |
bucket = s3.bucket.create(bckt) | |
end | |
bucket | |
end | |
end | |
=begin | |
Example usage: | |
Aws.config.update({ | |
region: 'aws_region', | |
credentials: Aws::Credentials.new('aws_access_key_id', 'aws_secret_access_key'), | |
}) | |
require "bucket_sync_service.rb" | |
syncer = BucketSyncService.new("from-bucket", "to-bucket") | |
syncer.debug = true # log each object | |
syncer.perform | |
=end |
Really helpful! It raises Aws::S3::Errors::NoSuchKey when the key includes non-ASCII characters like é, ü etc. though - have you found any way around that?
@Sprachprofi, hmm yeah you might try to CGI.escape
the object.key
in the sync method. but then you'd need to go back and un-escape everything. i'm not sure if the error you mention is getting raised from the source or destination key.
perhaps newer versions of the aws-sdk gem support this? do you run into encoding problems for non-ASCII keys in other places? there might not be good support for that and you might need to consider not using them in your bucket keys?? i dunno tho, i don't use AWS much these days.
Thanks @edwardsharp . how to handle if the source bucket and destination bucket has different credentials (access key & secret key)
Thanks @edwardsharp . how to handle if the source bucket and destination bucket has different credentials (access key & secret key)
i guess you could just define two different methods that wrap a call to AWS::S3.new
, like how bucket_from_credentials
does
Absolutely fantastic work, thank you @edwardsharp ! Confirmed working as of 6/23/17 using version 3.0.0.rc8 of aws-sdk