Skip to content

Instantly share code, notes, and snippets.

@brysontyrrell
Created April 1, 2021 14:18
Show Gist options
  • Save brysontyrrell/5e78d488eeb1d5c16298cf4663854142 to your computer and use it in GitHub Desktop.
Save brysontyrrell/5e78d488eeb1d5c16298cf4663854142 to your computer and use it in GitHub Desktop.
Code used for a Lambda function testing S3 object read speeds.
import hashlib
import time
import boto3
s3 = boto3.resource("s3")
BUCKET = "my-bucket"
KEYS = [
"test100mb.file",
"test500mb.file",
"test1gb.file",
"test5gb.file",
]
CHUNK_SIZE = 20000000
def lambda_handler(event, context):
print("Starting...")
for key in KEYS:
stream_file(key)
print("Complete")
def stream_file(key):
start_time = time.time()
hash_digest = hashlib.sha1()
s3_object = s3.Object(bucket_name=BUCKET, key=key).get()
for chunk in read_in_chunks(s3_object):
hash_digest.update(chunk)
ellapsed_time = time.time() - start_time
print(
f"File {key} - SHA1 {hash_digest.hexdigest()} - Total Seconds: {round(ellapsed_time, 2)}"
)
def read_in_chunks(s3_object: dict):
"""A generator that iterates over an S3 object in 10 MB chunks."""
stream = s3_object["Body"]._raw_stream
while True:
data = stream.read(CHUNK_SIZE)
if not data:
break
yield data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment