huevos-y-bacon · October 25, 2024 11:44
diff --git a/s3clone.py b/s3clone.py
 #!/usr/bin/python
 # -*- coding: ASCII -*-
 #
 # S3 Copy Bucket - Copy all objects of a S3 bucket
 #
 # Copyright (c) 2022 Carsten Grohmann
 # License: MIT (see LICENSE.txt)
 # THIS PROGRAM COMES WITH NO WARRANTY

 import boto3
 import sys
 from multiprocessing import Pool

 # if sys.version_info[0] == 2:
 #     from __future__ import print_function
 #     input = raw_input

 bucket_src_name = sys.argv[1]
 bucket_dest_name = sys.argv[2]

 session = boto3.session.Session()
 s3client = session.client('s3')


 def copy(key):
    try:
        s3client.copy_object(
            Bucket=bucket_dest_name,
            Key=key,
            CopySource={'Bucket': bucket_src_name, 'Key': key},
            MetadataDirective='COPY',
            )
    except Exception as e:
        print("Exception occurred for key %s: %s" % (key, e))
        return key
    print("Copied object: %s" % key)


 if __name__ == "__main__":
    print("Query objects information from s3://%s" % bucket_src_name)

    # fetch all objects
    paginator = s3client.get_paginator('list_objects_v2')
    results = paginator.paginate(Bucket=bucket_src_name).build_full_result()
    keys = [i['Key'] for i in results['Contents']]

    print("Copy %d objects from s3://%s to s3://%s" % (len(keys), bucket_src_name, bucket_dest_name))
    input("Press Enter to continue or CTRL-C to abort ...")

    p = Pool(10)
    res = p.map(copy, keys)

    for k in res:
        if not k:
            continue
        print("Failed copy: %s" % k)
diff --git a/s3diff.py b/s3diff.py
 #!/usr/bin/env python3

 import boto3
 import sys

 bucket_src_name = sys.argv[1]
 bucket_dest_name = sys.argv[2]


 def get_bucket_objects(bucket_name):
    """Retrieve all objects from an S3 bucket, excluding timestamps."""
    s3 = boto3.client('s3')
    paginator = s3.get_paginator('list_objects_v2')
    objects = {}

    for page in paginator.paginate(Bucket=bucket_name):
        for obj in page.get('Contents', []):
            objects[obj['Key']] = {
                'Size': obj['Size'],
                'ETag': obj['ETag']
            }
    return objects

 def compare_buckets(bucket1_name, bucket2_name):
    """Compare the contents of two S3 buckets, ignoring timestamps."""
    bucket1_objects = get_bucket_objects(bucket1_name)
    bucket2_objects = get_bucket_objects(bucket2_name)

    # Find objects that are only in one of the buckets
    only_in_bucket1 = set(bucket1_objects) - set(bucket2_objects)
    only_in_bucket2 = set(bucket2_objects) - set(bucket1_objects)

    # Find objects that are in both buckets but differ in size or ETag
    in_both_buckets_differ_size = [
        key for key in bucket1_objects.keys() & bucket2_objects.keys()
        if (bucket1_objects[key]['Size'] != bucket2_objects[key]['Size'])
    ]

    in_both_buckets_differ_etag = [
        key for key in bucket1_objects.keys() & bucket2_objects.keys()
        if (bucket1_objects[key]['ETag'] != bucket2_objects[key]['ETag'])
    ]

    print(f"Objects only in {bucket1_name}: {only_in_bucket1}")
    print(f"Objects only in {bucket2_name}: {only_in_bucket2}")
    print(f"Objects in both buckets but differ in size: {in_both_buckets_differ_size}")
    print(f"Objects in both buckets but differ in etag: {in_both_buckets_differ_etag}")


 if __name__ == "__main__":
    print(f"Comparing objects in s3 between {bucket_src_name} and {bucket_dest_name}\n")
    compare_buckets(bucket_src_name, bucket_dest_name)
	#!/usr/bin/python
	# -- coding: ASCII --
	#
	# S3 Copy Bucket - Copy all objects of a S3 bucket
	#
	# Copyright (c) 2022 Carsten Grohmann
	# License: MIT (see LICENSE.txt)
	# THIS PROGRAM COMES WITH NO WARRANTY

	import boto3
	import sys
	from multiprocessing import Pool

	# if sys.version_info[0] == 2:
	# from __future__ import print_function
	# input = raw_input

	bucket_src_name = sys.argv[1]
	bucket_dest_name = sys.argv[2]

	session = boto3.session.Session()
	s3client = session.client('s3')


	def copy(key):
	try:
	s3client.copy_object(
	Bucket=bucket_dest_name,
	Key=key,
	CopySource={'Bucket': bucket_src_name, 'Key': key},
	MetadataDirective='COPY',
	)
	except Exception as e:
	print("Exception occurred for key %s: %s" % (key, e))
	return key
	print("Copied object: %s" % key)


	if __name__ == "__main__":
	print("Query objects information from s3://%s" % bucket_src_name)

	# fetch all objects
	paginator = s3client.get_paginator('list_objects_v2')
	results = paginator.paginate(Bucket=bucket_src_name).build_full_result()
	keys = [i['Key'] for i in results['Contents']]

	print("Copy %d objects from s3://%s to s3://%s" % (len(keys), bucket_src_name, bucket_dest_name))
	input("Press Enter to continue or CTRL-C to abort ...")

	p = Pool(10)
	res = p.map(copy, keys)

	for k in res:
	if not k:
	continue
	print("Failed copy: %s" % k)
	#!/usr/bin/env python3

	import boto3
	import sys

	bucket_src_name = sys.argv[1]
	bucket_dest_name = sys.argv[2]


	def get_bucket_objects(bucket_name):
	"""Retrieve all objects from an S3 bucket, excluding timestamps."""
	s3 = boto3.client('s3')
	paginator = s3.get_paginator('list_objects_v2')
	objects = {}

	for page in paginator.paginate(Bucket=bucket_name):
	for obj in page.get('Contents', []):
	objects[obj['Key']] = {
	'Size': obj['Size'],
	'ETag': obj['ETag']
	}
	return objects

	def compare_buckets(bucket1_name, bucket2_name):
	"""Compare the contents of two S3 buckets, ignoring timestamps."""
	bucket1_objects = get_bucket_objects(bucket1_name)
	bucket2_objects = get_bucket_objects(bucket2_name)

	# Find objects that are only in one of the buckets
	only_in_bucket1 = set(bucket1_objects) - set(bucket2_objects)
	only_in_bucket2 = set(bucket2_objects) - set(bucket1_objects)

	# Find objects that are in both buckets but differ in size or ETag
	in_both_buckets_differ_size = [
	key for key in bucket1_objects.keys() & bucket2_objects.keys()
	if (bucket1_objects[key]['Size'] != bucket2_objects[key]['Size'])
	]

	in_both_buckets_differ_etag = [
	key for key in bucket1_objects.keys() & bucket2_objects.keys()
	if (bucket1_objects[key]['ETag'] != bucket2_objects[key]['ETag'])
	]

	print(f"Objects only in {bucket1_name}: {only_in_bucket1}")
	print(f"Objects only in {bucket2_name}: {only_in_bucket2}")
	print(f"Objects in both buckets but differ in size: {in_both_buckets_differ_size}")
	print(f"Objects in both buckets but differ in etag: {in_both_buckets_differ_etag}")


	if __name__ == "__main__":
	print(f"Comparing objects in s3 between {bucket_src_name} and {bucket_dest_name}\n")
	compare_buckets(bucket_src_name, bucket_dest_name)