-
-
Save GWuk/9f78f608e5f0411e6020 to your computer and use it in GitHub Desktop.
ruby class to copy from one aws s3 bucket to another - using aws-sdk Version 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'aws-sdk' # gem name is 'aws-sdk', uses Version 2 | |
class BucketSyncService | |
attr_reader :from_bucket, :to_bucket, :logger, :path, :etag | |
attr_accessor :debug | |
# from_credentials and to_credentials are both hashes with these keys: | |
# * :aws_access_key_id | |
# * :aws_secret_access_key | |
# * :bucket | |
# path_prefix: obvious ... | |
# with_etag: compare based additionally on etag | |
def initialize(from_credentials, to_credentials, path_prefix, with_etag) | |
@from_bucket = bucket_from_credentials(from_credentials) | |
@to_bucket = bucket_from_credentials(to_credentials) | |
@path = path_prefix | |
@etag = with_etag | |
end | |
def perform(output=STDOUT) | |
object_counts = {sync:0, skip:0} | |
create_logger(output) | |
logger.info "Starting sync from #{from_bucket.name}/#{path} to #{to_bucket.name}/#{path}" | |
# get content from buckets | |
logger.debug "get list from" | |
from_objects = from_bucket.objects(prefix:path).collect{|obj| [obj.key, etag ? obj.etag : nil ]} | |
logger.debug "get list to" | |
to_objects = to_bucket.objects(prefix:path).collect{|obj| [obj.key, etag ? obj.etag : nil]} | |
# diff array | |
missing_objects = from_objects - to_objects | |
logger.debug "diff: #{missing_objects.collect{|x| x[0]}}" | |
logger.debug "copy objects" | |
# for each diff copy | |
missing_objects.each do |key| | |
object = from_bucket.object(key[0]) | |
sync(object) | |
object_counts[:sync] += 1 | |
end | |
logger.info "Done. Synced #{object_counts[:sync]}" | |
end | |
private | |
def create_logger(output) | |
@logger = Logger.new(output).tap do |l| | |
l.level = debug ? Logger::DEBUG : Logger::INFO | |
end | |
end | |
def sync(object) | |
logger.debug "Syncing #{pp object}" | |
# let to_bucket read object | |
object.acl.put({ :grant_read => "id=\"#{to_bucket.acl.owner.id}\"" , :grant_full_control => "id=\"#{object.bucket.acl.owner.id}\"" }) | |
# pull object (needed in aws-sdk V2) | |
to_bucket.object(object.key).copy_from(object) | |
# remove acl to read granted before | |
object.acl.put({ :grant_full_control => "id=\"#{object.bucket.acl.owner.id}\"" }) | |
end | |
def pp(object) | |
content_length_in_kb = object.content_length / 1024 | |
"#{object.key} #{content_length_in_kb}k " + | |
"#{object.last_modified.strftime("%b %d %Y %H:%M")}" | |
end | |
def bucket_from_credentials(credentials) | |
s3 = Aws::S3::Resource.new(access_key_id: credentials[:aws_access_key_id], | |
secret_access_key: credentials[:aws_secret_access_key], | |
region: 'eu-west-1') | |
bucket = s3.bucket( credentials[:bucket] ) | |
# if !bucket.exists? | |
# bucket = s3.buckets.create( credentials[:bucket] ) | |
# logger.info "Created bucket #{credentials[:bucket]}" | |
# end | |
bucket | |
end | |
end | |
from_creds = {aws_access_key_id:"", aws_secret_access_key:"", bucket:""} | |
to_creds = {aws_access_key_id:"", aws_secret_access_key:"", bucket:""} | |
path = '' | |
syncer = BucketSyncService.new(from_creds, to_creds, path, true) | |
syncer.debug = true # log each object | |
syncer.perform | |
=begin | |
Install on AWS Debian 7 Wheezy (ami-61e56916): | |
apt-get install rubygems1.9.1 | |
gem1.9.1 install aws-sdk | |
=end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment