brysontyrrell · April 1, 2021 14:18
diff --git a/s3tester.py b/s3tester.py
 import hashlib
 import time

 import boto3

 s3 = boto3.resource("s3")

 BUCKET = "my-bucket"
 KEYS = [
    "test100mb.file",
    "test500mb.file",
    "test1gb.file",
    "test5gb.file",
 ]
 CHUNK_SIZE = 20000000


 def lambda_handler(event, context):
    print("Starting...")
    for key in KEYS:
        stream_file(key)

    print("Complete")


 def stream_file(key):
    start_time = time.time()
    hash_digest = hashlib.sha1()
    s3_object = s3.Object(bucket_name=BUCKET, key=key).get()

    for chunk in read_in_chunks(s3_object):
        hash_digest.update(chunk)

    ellapsed_time = time.time() - start_time
    print(
        f"File {key} - SHA1 {hash_digest.hexdigest()} - Total Seconds: {round(ellapsed_time, 2)}"
    )


 def read_in_chunks(s3_object: dict):
    """A generator that iterates over an S3 object in 10 MB chunks."""
    stream = s3_object["Body"]._raw_stream
    while True:
        data = stream.read(CHUNK_SIZE)
        if not data:
            break
        yield data
	import hashlib
	import time

	import boto3

	s3 = boto3.resource("s3")

	BUCKET = "my-bucket"
	KEYS = [
	"test100mb.file",
	"test500mb.file",
	"test1gb.file",
	"test5gb.file",
	]
	CHUNK_SIZE = 20000000


	def lambda_handler(event, context):
	print("Starting...")
	for key in KEYS:
	stream_file(key)

	print("Complete")


	def stream_file(key):
	start_time = time.time()
	hash_digest = hashlib.sha1()
	s3_object = s3.Object(bucket_name=BUCKET, key=key).get()

	for chunk in read_in_chunks(s3_object):
	hash_digest.update(chunk)

	ellapsed_time = time.time() - start_time
	print(
	f"File {key} - SHA1 {hash_digest.hexdigest()} - Total Seconds: {round(ellapsed_time, 2)}"
	)


	def read_in_chunks(s3_object: dict):
	"""A generator that iterates over an S3 object in 10 MB chunks."""
	stream = s3_object["Body"]._raw_stream
	while True:
	data = stream.read(CHUNK_SIZE)
	if not data:
	break
	yield data