Skip to content

Instantly share code, notes, and snippets.

@metadaddy
Last active October 6, 2024 23:10
Show Gist options
  • Save metadaddy/27ce7f68edcd09454407c7494d146cef to your computer and use it in GitHub Desktop.
Save metadaddy/27ce7f68edcd09454407c7494d146cef to your computer and use it in GitHub Desktop.
Find most recently uploaded file in a Backblaze B2 bucket (with optional prefix)
import argparse
from datetime import datetime, MINYEAR, timezone
import boto3
from dotenv import load_dotenv
def list_objects(client, bucket_name, prefix=''):
"""
Python generator to allow easy iteration over object versions, making
API calls as necessary rather than reading the entire list into memory.
Adapted from https://stackoverflow.com/a/54014862/33905
:param client: a boto3 S3 client object
:param bucket_name: the bucket name
:param prefix: an optional prefix; defaults to ''
:return: a single object version
"""
s3_paginator = client.get_paginator('list_objects_v2')
for page in s3_paginator.paginate(Bucket=bucket_name, Prefix=prefix):
for version in page.get('Contents', ()):
yield version
def main():
parser = argparse.ArgumentParser(description='Find most recent upload to a bucket')
parser.add_argument('bucket', type=str, nargs=1, help='a bucket name')
parser.add_argument('prefix', type=str, nargs='?', default='', help='a prefix within the bucket')
args = parser.parse_args()
bucket = args.bucket[0]
prefix = args.prefix[1:] if args.prefix.startswith('/') else args.prefix
# Never put credentials in source code!
# Create a file in this directory, named .env, with contents:
# AWS_ACCESS_KEY_ID=your application key id
# AWS_SECRET_ACCESS_KEY=your application key
# AWS_REGION=region from your bucket endpoint, e.g. us-west-004
# AWS_ENDPOINT_URL=your bucket endpoint, e.g. https://s3.us-west-004.backblazeb2.com
load_dotenv()
client = boto3.client('s3')
max_last_modified_time = datetime(MINYEAR, 1, 1, tzinfo=timezone.utc)
max_last_modified_key = ''
count = 0
for obj in list_objects(client, bucket, prefix=prefix):
count += 1
if obj['LastModified'] > max_last_modified_time:
max_last_modified_time = obj['LastModified']
max_last_modified_key = obj['Key']
path = bucket if len(prefix) == 0 else f'{bucket}/{prefix}'
if max_last_modified_key:
print(f'Last upload to {path} was {max_last_modified_key} at {max_last_modified_time}')
else:
print(f'No files in {path}')
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment