Skip to content

Instantly share code, notes, and snippets.

@richardcalahan
Last active December 17, 2024 23:17
Show Gist options
  • Save richardcalahan/f61b14b35028e8e83156230bc69688f2 to your computer and use it in GitHub Desktop.
Save richardcalahan/f61b14b35028e8e83156230bc69688f2 to your computer and use it in GitHub Desktop.
#!/bin/bash
BUCKET="sgw-pd-t1"
PAGE_SIZE=10
# Search and replace this key
NEW_META_KEY="file-permissions"
NEW_META_VALUE="0777"
# can manually set in case of script failure, new token is output every `PAGE_SIZE` requests.
NEXT_TOKEN=""
while : ; do
echo "Fetching objects..."
# Build the list-objects-v2 command dynamically to handle the starting token
if [ -z "$NEXT_TOKEN" ]; then
# First call (no --starting-token)
RESULT=$(aws s3api list-objects-v2 --bucket "$BUCKET" --query 'Contents[].Key' --max-items "$PAGE_SIZE" --output json)
else
# Subsequent calls with --starting-token
echo "Continuing with token: $NEXT_TOKEN"
RESULT=$(aws s3api list-objects-v2 --bucket "$BUCKET" --query 'Contents[].Key' --max-items "$PAGE_SIZE" --starting-token "$NEXT_TOKEN" --output json)
fi
# Break if no more results
if [ -z "$RESULT" ] || [ "$RESULT" == "null" ]; then
echo "No more objects found."
break
fi
# Process each key
echo "$RESULT" | jq -r '.[]' | while IFS= read -r KEY; do
if [[ "$KEY" == */._* || "$KEY" == ._* ]]; then
echo "Skipping: $KEY (contains file or folder starting with ._)"
continue
fi
echo "Processing: $KEY"
# Step 1: Fetch existing metadata
EXISTING_METADATA=$(aws s3api head-object --bucket "$BUCKET" --key "$KEY" --query 'Metadata' --output json)
META_VALUE=$(echo "$EXISTING_METADATA" | jq -r --arg key "$NEW_META_KEY" '.[$key] // empty')
#Skip if already has desired value
if [ "$META_VALUE" == "$NEW_META_VALUE" ]; then
echo "Skipping: Desired meta value already set. $META_VALUE."
continue
fi
# Step 2: Build combined metadata string
COMBINED_METADATA=""
for k in $(echo "$EXISTING_METADATA" | jq -r 'keys[]'); do
v=$(echo "$EXISTING_METADATA" | jq -r --arg key "$k" '.[$key]')
if [ "$k" != "$NEW_META_KEY" ]; then
COMBINED_METADATA+="$k=$v,"
fi
done
# Add or update the new metadata key
COMBINED_METADATA+="$NEW_META_KEY=$NEW_META_VALUE"
COMBINED_METADATA=${COMBINED_METADATA%,}
# Step 3: Get object size
SIZE=$(aws s3api head-object --bucket "$BUCKET" --key "$KEY" --query 'ContentLength' --output text)
# Handle files > 5GB with multipart upload
if [ "$SIZE" -gt 5368709120 ]; then
echo "File $KEY is larger than 5GB. Using multipart upload."
# Initiate multipart upload
UPLOAD_ID=$(aws s3api create-multipart-upload --bucket "$BUCKET" --key "$KEY" --metadata "$COMBINED_METADATA" --query 'UploadId' --output text)
echo "Initiated multipart upload: $UPLOAD_ID"
## Upload part copy (single part for this script)
aws s3api upload-part-copy \
--bucket "$BUCKET" \
--key "$KEY" \
--copy-source "$BUCKET/$KEY" \
--part-number 1 \
--upload-id "$UPLOAD_ID" \
--output json
## Complete multipart upload
ETAG=$(aws s3api head-object --bucket "$BUCKET" --key "$KEY" --query 'ETag' --output text | tr -d '"')
PARTS_JSON="{\"Parts\":[{\"PartNumber\":1,\"ETag\":\"$ETAG\"}]}"
aws s3api complete-multipart-upload --bucket "$BUCKET" --key "$KEY" --upload-id "$UPLOAD_ID" --multipart-upload "$PARTS_JSON"
echo "Completed multipart upload for: $KEY"
else
# Use regular copy for files <= 5GB
echo "File $KEY is smaller than 5GB. Using regular copy."
aws s3 cp "s3://$BUCKET/$KEY" "s3://$BUCKET/$KEY" \
--metadata "$COMBINED_METADATA" --metadata-directive REPLACE
fi
echo "Updated metadata for: $KEY"
done
# Fetch next token for pagination
if [ -z "$NEXT_TOKEN" ]; then
# First call (no --starting-token)
NEXT_TOKEN=$(aws s3api list-objects-v2 --bucket "$BUCKET" --max-items "$PAGE_SIZE" --query 'NextToken || `""`' --output text | tr -d '\n')
else
# Subsequent calls with --starting-token
NEXT_TOKEN=$(aws s3api list-objects-v2 --bucket "$BUCKET" --max-items "$PAGE_SIZE" --starting-token "$NEXT_TOKEN" --query 'NextToken || `""`' --output text | tr -d '\n')
fi
# Break if no more pages
if [ "$NEXT_TOKEN" == "None" ] || [ -z "$NEXT_TOKEN" ]; then
break
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment