Skip to content

Instantly share code, notes, and snippets.

@GWuk
Forked from bantic/bucket_sync_service.rb
Last active December 26, 2015 21:16
Show Gist options
  • Save GWuk/9f78f608e5f0411e6020 to your computer and use it in GitHub Desktop.
Save GWuk/9f78f608e5f0411e6020 to your computer and use it in GitHub Desktop.
ruby class to copy from one aws s3 bucket to another - using aws-sdk Version 2
#!/usr/bin/env ruby
require 'aws-sdk' # gem name is 'aws-sdk', uses Version 2
class BucketSyncService
attr_reader :from_bucket, :to_bucket, :logger, :path, :etag
attr_accessor :debug
# from_credentials and to_credentials are both hashes with these keys:
# * :aws_access_key_id
# * :aws_secret_access_key
# * :bucket
# path_prefix: obvious ...
# with_etag: compare based additionally on etag
def initialize(from_credentials, to_credentials, path_prefix, with_etag)
@from_bucket = bucket_from_credentials(from_credentials)
@to_bucket = bucket_from_credentials(to_credentials)
@path = path_prefix
@etag = with_etag
end
def perform(output=STDOUT)
object_counts = {sync:0, skip:0}
create_logger(output)
logger.info "Starting sync from #{from_bucket.name}/#{path} to #{to_bucket.name}/#{path}"
# get content from buckets
logger.debug "get list from"
from_objects = from_bucket.objects(prefix:path).collect{|obj| [obj.key, etag ? obj.etag : nil ]}
logger.debug "get list to"
to_objects = to_bucket.objects(prefix:path).collect{|obj| [obj.key, etag ? obj.etag : nil]}
# diff array
missing_objects = from_objects - to_objects
logger.debug "diff: #{missing_objects.collect{|x| x[0]}}"
logger.debug "copy objects"
# for each diff copy
missing_objects.each do |key|
object = from_bucket.object(key[0])
sync(object)
object_counts[:sync] += 1
end
logger.info "Done. Synced #{object_counts[:sync]}"
end
private
def create_logger(output)
@logger = Logger.new(output).tap do |l|
l.level = debug ? Logger::DEBUG : Logger::INFO
end
end
def sync(object)
logger.debug "Syncing #{pp object}"
# let to_bucket read object
object.acl.put({ :grant_read => "id=\"#{to_bucket.acl.owner.id}\"" , :grant_full_control => "id=\"#{object.bucket.acl.owner.id}\"" })
# pull object (needed in aws-sdk V2)
to_bucket.object(object.key).copy_from(object)
# remove acl to read granted before
object.acl.put({ :grant_full_control => "id=\"#{object.bucket.acl.owner.id}\"" })
end
def pp(object)
content_length_in_kb = object.content_length / 1024
"#{object.key} #{content_length_in_kb}k " +
"#{object.last_modified.strftime("%b %d %Y %H:%M")}"
end
def bucket_from_credentials(credentials)
s3 = Aws::S3::Resource.new(access_key_id: credentials[:aws_access_key_id],
secret_access_key: credentials[:aws_secret_access_key],
region: 'eu-west-1')
bucket = s3.bucket( credentials[:bucket] )
# if !bucket.exists?
# bucket = s3.buckets.create( credentials[:bucket] )
# logger.info "Created bucket #{credentials[:bucket]}"
# end
bucket
end
end
from_creds = {aws_access_key_id:"", aws_secret_access_key:"", bucket:""}
to_creds = {aws_access_key_id:"", aws_secret_access_key:"", bucket:""}
path = ''
syncer = BucketSyncService.new(from_creds, to_creds, path, true)
syncer.debug = true # log each object
syncer.perform
=begin
Install on AWS Debian 7 Wheezy (ami-61e56916):
apt-get install rubygems1.9.1
gem1.9.1 install aws-sdk
=end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment