Last active
March 20, 2026 16:58
-
-
Save minrk/cae98fd28a9d23903a2321ceb775649a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| FROM python:3.13-slim | |
| RUN pip install boto3 | |
| COPY s3rmrf.py /usr/local/bin/s3rmrf.py |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| apiVersion: batch/v1 | |
| kind: Job | |
| metadata: | |
| name: s3-delete | |
| spec: | |
| parallelism: 1 | |
| template: | |
| metadata: | |
| labels: | |
| app: deleter | |
| spec: | |
| restartPolicy: OnFailure | |
| containers: | |
| - name: rmrf | |
| image: ghcr.io/minrk/mybinder-s3rmrf | |
| command: | |
| - python3 | |
| - /s3rmrf/s3rmrf.py | |
| - mybinder-2i2c-registry | |
| - docker/ | |
| env: | |
| - name: PYTHONUNBUFFERED | |
| value: "1" | |
| - name: AWS_ENDPOINT_URL | |
| value: https://nbg1.your-objectstorage.com | |
| - name: AWS_ACCESS_KEY_ID | |
| valueFrom: | |
| secretKeyRef: | |
| name: hetzner-2i2c-harbor-registry | |
| key: REGISTRY_STORAGE_S3_ACCESSKEY | |
| - name: AWS_SECRET_ACCESS_KEY | |
| valueFrom: | |
| secretKeyRef: | |
| name: hetzner-2i2c-harbor-registry | |
| key: REGISTRY_STORAGE_S3_SECRETKEY | |
| volumeMounts: | |
| - mountPath: /s3rmrf | |
| name: s3rmrf | |
| readOnly: true | |
| volumes: | |
| - name: s3rmrf | |
| configMap: | |
| name: s3rmrf |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| delete all s3 files in a prefix | |
| way faster that aws s3 rm --recursive, which doesn't seem to do bulk delete for some reason | |
| 2 api requests per 1000 files | |
| add to job with | |
| kubectl create configmap s3rmrf --from-file ./s3rmrf.py | |
| """ | |
| import pprint | |
| import time | |
| from datetime import timedelta | |
| import boto3 | |
| def main(bucket, prefix): | |
| s3 = boto3.client("s3") | |
| start = time.perf_counter() | |
| total = 0 | |
| while True: | |
| listing = s3.list_objects_v2(Bucket=bucket, Prefix=prefix) | |
| objects = listing["Contents"] | |
| for obj in objects: | |
| if not obj['Key'].startswith(prefix): | |
| raise ValueError(f"Not deleting! {obj}") | |
| to_delete = [{'Key': obj['Key']} for obj in objects] | |
| to_delete[-1]['Key'] = 'nosuch' | |
| response = s3.delete_objects(Bucket=bucket, Delete=dict(Objects=to_delete)) | |
| total += len(response['Deleted']) | |
| if set(response.keys()) != {"Deleted", "ResponseMetadata"}: | |
| pprint.pprint(response) | |
| duration = time.perf_counter() - start | |
| td = timedelta(seconds=int(duration)) | |
| print(f"{td} {total=}, rate={total/duration:.0f}/s") | |
| if __name__ == "__main__": | |
| import argparse | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("bucket") | |
| parser.add_argument("prefix") | |
| opts = parser.parse_args() | |
| main(bucket=opts.bucket, prefix=opts.prefix) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment