Last active
December 17, 2024 23:17
-
-
Save richardcalahan/f61b14b35028e8e83156230bc69688f2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
BUCKET="sgw-pd-t1" | |
PAGE_SIZE=10 | |
# Search and replace this key | |
NEW_META_KEY="file-permissions" | |
NEW_META_VALUE="0777" | |
# can manually set in case of script failure, new token is output every `PAGE_SIZE` requests. | |
NEXT_TOKEN="" | |
while : ; do | |
echo "Fetching objects..." | |
# Build the list-objects-v2 command dynamically to handle the starting token | |
if [ -z "$NEXT_TOKEN" ]; then | |
# First call (no --starting-token) | |
RESULT=$(aws s3api list-objects-v2 --bucket "$BUCKET" --query 'Contents[].Key' --max-items "$PAGE_SIZE" --output json) | |
else | |
# Subsequent calls with --starting-token | |
echo "Continuing with token: $NEXT_TOKEN" | |
RESULT=$(aws s3api list-objects-v2 --bucket "$BUCKET" --query 'Contents[].Key' --max-items "$PAGE_SIZE" --starting-token "$NEXT_TOKEN" --output json) | |
fi | |
# Break if no more results | |
if [ -z "$RESULT" ] || [ "$RESULT" == "null" ]; then | |
echo "No more objects found." | |
break | |
fi | |
# Process each key | |
echo "$RESULT" | jq -r '.[]' | while IFS= read -r KEY; do | |
if [[ "$KEY" == */._* || "$KEY" == ._* ]]; then | |
echo "Skipping: $KEY (contains file or folder starting with ._)" | |
continue | |
fi | |
echo "Processing: $KEY" | |
# Step 1: Fetch existing metadata | |
EXISTING_METADATA=$(aws s3api head-object --bucket "$BUCKET" --key "$KEY" --query 'Metadata' --output json) | |
META_VALUE=$(echo "$EXISTING_METADATA" | jq -r --arg key "$NEW_META_KEY" '.[$key] // empty') | |
#Skip if already has desired value | |
if [ "$META_VALUE" == "$NEW_META_VALUE" ]; then | |
echo "Skipping: Desired meta value already set. $META_VALUE." | |
continue | |
fi | |
# Step 2: Build combined metadata string | |
COMBINED_METADATA="" | |
for k in $(echo "$EXISTING_METADATA" | jq -r 'keys[]'); do | |
v=$(echo "$EXISTING_METADATA" | jq -r --arg key "$k" '.[$key]') | |
if [ "$k" != "$NEW_META_KEY" ]; then | |
COMBINED_METADATA+="$k=$v," | |
fi | |
done | |
# Add or update the new metadata key | |
COMBINED_METADATA+="$NEW_META_KEY=$NEW_META_VALUE" | |
COMBINED_METADATA=${COMBINED_METADATA%,} | |
# Step 3: Get object size | |
SIZE=$(aws s3api head-object --bucket "$BUCKET" --key "$KEY" --query 'ContentLength' --output text) | |
# Handle files > 5GB with multipart upload | |
if [ "$SIZE" -gt 5368709120 ]; then | |
echo "File $KEY is larger than 5GB. Using multipart upload." | |
# Initiate multipart upload | |
UPLOAD_ID=$(aws s3api create-multipart-upload --bucket "$BUCKET" --key "$KEY" --metadata "$COMBINED_METADATA" --query 'UploadId' --output text) | |
echo "Initiated multipart upload: $UPLOAD_ID" | |
## Upload part copy (single part for this script) | |
aws s3api upload-part-copy \ | |
--bucket "$BUCKET" \ | |
--key "$KEY" \ | |
--copy-source "$BUCKET/$KEY" \ | |
--part-number 1 \ | |
--upload-id "$UPLOAD_ID" \ | |
--output json | |
## Complete multipart upload | |
ETAG=$(aws s3api head-object --bucket "$BUCKET" --key "$KEY" --query 'ETag' --output text | tr -d '"') | |
PARTS_JSON="{\"Parts\":[{\"PartNumber\":1,\"ETag\":\"$ETAG\"}]}" | |
aws s3api complete-multipart-upload --bucket "$BUCKET" --key "$KEY" --upload-id "$UPLOAD_ID" --multipart-upload "$PARTS_JSON" | |
echo "Completed multipart upload for: $KEY" | |
else | |
# Use regular copy for files <= 5GB | |
echo "File $KEY is smaller than 5GB. Using regular copy." | |
aws s3 cp "s3://$BUCKET/$KEY" "s3://$BUCKET/$KEY" \ | |
--metadata "$COMBINED_METADATA" --metadata-directive REPLACE | |
fi | |
echo "Updated metadata for: $KEY" | |
done | |
# Fetch next token for pagination | |
if [ -z "$NEXT_TOKEN" ]; then | |
# First call (no --starting-token) | |
NEXT_TOKEN=$(aws s3api list-objects-v2 --bucket "$BUCKET" --max-items "$PAGE_SIZE" --query 'NextToken || `""`' --output text | tr -d '\n') | |
else | |
# Subsequent calls with --starting-token | |
NEXT_TOKEN=$(aws s3api list-objects-v2 --bucket "$BUCKET" --max-items "$PAGE_SIZE" --starting-token "$NEXT_TOKEN" --query 'NextToken || `""`' --output text | tr -d '\n') | |
fi | |
# Break if no more pages | |
if [ "$NEXT_TOKEN" == "None" ] || [ -z "$NEXT_TOKEN" ]; then | |
break | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment