Created
April 1, 2021 14:18
-
-
Save brysontyrrell/5e78d488eeb1d5c16298cf4663854142 to your computer and use it in GitHub Desktop.
Code used for a Lambda function testing S3 object read speeds.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib | |
import time | |
import boto3 | |
s3 = boto3.resource("s3") | |
BUCKET = "my-bucket" | |
KEYS = [ | |
"test100mb.file", | |
"test500mb.file", | |
"test1gb.file", | |
"test5gb.file", | |
] | |
CHUNK_SIZE = 20000000 | |
def lambda_handler(event, context): | |
print("Starting...") | |
for key in KEYS: | |
stream_file(key) | |
print("Complete") | |
def stream_file(key): | |
start_time = time.time() | |
hash_digest = hashlib.sha1() | |
s3_object = s3.Object(bucket_name=BUCKET, key=key).get() | |
for chunk in read_in_chunks(s3_object): | |
hash_digest.update(chunk) | |
ellapsed_time = time.time() - start_time | |
print( | |
f"File {key} - SHA1 {hash_digest.hexdigest()} - Total Seconds: {round(ellapsed_time, 2)}" | |
) | |
def read_in_chunks(s3_object: dict): | |
"""A generator that iterates over an S3 object in 10 MB chunks.""" | |
stream = s3_object["Body"]._raw_stream | |
while True: | |
data = stream.read(CHUNK_SIZE) | |
if not data: | |
break | |
yield data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment