-
-
Save weavenet/f40b09847ac17dd99d16 to your computer and use it in GitHub Desktop.
#!/bin/bash | |
bucket=$1 | |
set -e | |
echo "Removing all versions from $bucket" | |
versions=`aws s3api list-object-versions --bucket $bucket |jq '.Versions'` | |
markers=`aws s3api list-object-versions --bucket $bucket |jq '.DeleteMarkers'` | |
let count=`echo $versions |jq 'length'`-1 | |
if [ $count -gt -1 ]; then | |
echo "removing files" | |
for i in $(seq 0 $count); do | |
key=`echo $versions | jq .[$i].Key |sed -e 's/\"//g'` | |
versionId=`echo $versions | jq .[$i].VersionId |sed -e 's/\"//g'` | |
cmd="aws s3api delete-object --bucket $bucket --key $key --version-id $versionId" | |
echo $cmd | |
$cmd | |
done | |
fi | |
let count=`echo $markers |jq 'length'`-1 | |
if [ $count -gt -1 ]; then | |
echo "removing delete markers" | |
for i in $(seq 0 $count); do | |
key=`echo $markers | jq .[$i].Key |sed -e 's/\"//g'` | |
versionId=`echo $markers | jq .[$i].VersionId |sed -e 's/\"//g'` | |
cmd="aws s3api delete-object --bucket $bucket --key $key --version-id $versionId" | |
echo $cmd | |
$cmd | |
done | |
fi |
Note: Until and unless you don't have "s3:DeleteObjectVersion" included in policy under IAM role, all version deletion wont be working.
Thanks @nashjain ... here is my version off yours :)
(echo -n '{"Objects":';aws s3api list-object-versions --bucket "$bucket" --prefix "$prefix" --max-items 1000 --query "Versions[?(LastModified<'2020-07-21')].{Key: Key, VersionId: VersionId}" | sed 's#]$#] , "Quiet":true}#') > _TMP_DELETE && aws s3api delete-objects --bucket "$bucket" --delete file://_TMP_DELETE
To do 1000 at a time.
Found I could put in a loop and get through about 3 iterations (or 3k objects a minute). So produced this script which downloads 10k objects, then uses jq to slice 1k at a time and deletes, looping 4k times. Now up to around 4.5k objects a minute.
bucket=_BUCKET_NAME_
prefix=_PREFIX_
cnt=0
FN=/tmp/_TMP_DELETE
rm $FN 2> /dev/null
while [ $cnt -lt 4000 ]
do
aws s3api list-object-versions --bucket "$bucket" --prefix "$prefix" --max-items 10000 --query "Versions[?(LastModified<'2019-07-21')].{Key: Key, VersionId: VersionId}" > $FN
rm $FN.upload 2> /dev/null
s=0
while [ $s -lt 9999 ]
do
((e=s+999))
#echo taking $s to $e
(echo -n '{"Objects":';jq ".[$s:$e]" < $FN 2>&1 | sed 's#]$#] , "Quiet":true}#') > $FN.upload
aws s3api delete-objects --bucket "$bucket" --delete file://$FN.upload && rm $FN.upload
((s=e+1))
#echo s is $s and e is $e
echo -n "."
done
((cnt++))
((tot=cnt*10))
echo on run $cnt total deleted ${tot}k objects
done
Okay ... faster still (~10k/min) - just dump all in the file then:
bucket=_BUCKET_
prefix=_PREFIX_
SRCFN=_DUMP_FILE_
FN=/tmp/_TMP_DELETE
aws s3api list-object-versions --bucket "$bucket" --prefix "$prefix" --query "Versions[?(LastModified<'2019-07-21')].{Key: Key, VersionId: VersionId}" > $SRCFN
rm $FN 2> /dev/null
s=0
c=`grep -c VersionId $SRCFN`
while [ $s -lt $c ]
do
((e=s+999))
echo taking $s to $e
(echo -n '{"Objects":';jq ".[$s:$e]" < $SRCFN 2>&1 | sed 's#]$#] , "Quiet":true}#') > $FN
aws s3api delete-objects --bucket "$bucket" --delete file://$FN && rm $FN
((s=e+1))
sleep 1
#echo s is $s and e is $e
#echo -n "."
done
Yet another minor update to fix the issue when the key (file name) contain spaces
`#!/bin/bash
bucket=$1
set -e
echo "Removing all versions from $bucket"
versions=aws s3api list-object-versions --bucket $bucket |jq '.Versions'
markers=aws s3api list-object-versions --bucket $bucket |jq '.DeleteMarkers'
let count=echo $versions |jq 'length'
-1
if [ $count -gt -1 ]; then
echo "removing files"
for i in $(seq 0 $count); do
key=echo $versions | jq .[$i].Key |sed -e 's/\"//g'
versionId=echo $versions | jq .[$i].VersionId |sed -e 's/\"//g'
cmd="aws s3api delete-object --bucket $bucket --key "$key" --version-id $versionId"
echo $cmd
eval $cmd
done
fi
let count=echo $markers |jq 'length'
-1
if [ $count -gt -1 ]; then
echo "removing delete markers"
for i in $(seq 0 $count); do
key=`echo $markers | jq .[$i].Key |sed -e 's/\"//g'`
versionId=`echo $markers | jq .[$i].VersionId |sed -e 's/\"//g'`
cmd="aws s3api delete-object --bucket $bucket --key \"$key\" --version-id $versionId"
echo $cmd
eval $cmd
done
fi`
AWS CLI requires python, and there's a much much better way to do this using python:
import boto3 session = boto3.session() s3 = session.resource(service_name='s3') bucket = s3.Bucket('your_bucket_name') bucket.object_versions.delete() # bucket.delete()
This could be, if you want to use a named profile:
import boto3
session = boto3.session.Session(profile_name='your_profile_name')
s3 = session.resource(service_name='s3')
bucket = s3.Bucket('your_bucket_name')
## uncomment the line below to delete your bucket objects versions; BE CAREFUL!!!
# bucket.object_versions.delete()
## uncomment the line below to delete your bucket; BE CAREFUL!!!
# bucket.delete()
With the AWS CLI v2, by default it returns all output through a pager program (e.g. less
). Cf. Output paging.
To disable it, run:
export AWS_PAGER=""
#!/usr/bin/env bash
set -o errexit -o noclobber -o nounset -o pipefail
if [[ "$#" -eq 0 ]]
then
cat >&2 << 'EOF'
./clear-s3-buckets.bash BUCKET [BUCKET…]
Deletes *all* versions of *all* files in *all* given buckets. Only to be used in case of emergency!
EOF
exit 1
fi
read -n1 -p "THIS WILL DELETE EVERYTHING IN BUCKETS ${*}! Press Ctrl-c to cancel or anything else to continue: " -r
delete_objects() {
count="$(jq length <<< "$1")"
if [[ "$count" -eq 0 ]]
then
echo "No objects found; skipping" >&2
return
fi
echo "Removing objects"
for index in $(seq 0 $(("$count" - 1)))
do
key="$(jq --raw-output ".[${index}].Key" <<< "$1")"
version_id="$(jq --raw-output ".[${index}].VersionId" <<< "$1")"
delete_command=(aws s3api delete-object --bucket="$bucket" --key="$key" --version-id="$version_id")
printf '%q ' "${delete_command[@]}"
printf '\n'
"${delete_command[@]}"
done
}
for bucket
do
versions="$(aws s3api list-object-versions --bucket="$bucket" | jq .Versions)"
delete_objects "$versions"
markers="$(aws s3api list-object-versions --bucket="$bucket" | jq .DeleteMarkers)"
delete_objects "$markers"
done
Improvements:
- Passes
shellcheck
- Idiomatic Bash
- Safety pragmas at the top
- Reuses loop code
- Uses More Quotes™
- Simplified commands by using
jq
's--raw-output
- Various ergonomics like a warning prompt, printing if no entries were found, escaping the command when printing it, and usage instructions
- Processes multiple buckets
Came up with that version, using headless commands & specifying region & profile :
https://gist.github.com/andy-b-84/9b9df3dc9ca8f7d50cd910b23cea5e0e
This gist was very useful.
This error occurs when the aws command's default output format is not json
:
parse error: Invalid numeric literal at line 2, column 0
This has a very simple fix:
Wherever aws command output is passed to jq
, let the script specify --output=json
.
For instance:
versions=`aws s3api list-object-versions --bucket $bucket |jq '.Versions'`
becomes
versions=`aws --output=json s3api list-object-versions --bucket $bucket |jq '.Versions'`
@kayomarz I think that might be a setting on your side - I don't need --output=json
.
@kayomarz I think that might be a setting on your side - I don't need
--output=json
.
@l0b0 Yes, my aws CLI is configured with output = table
(aws CLI output is no longer json) and this script results in parse error: Invalid numeric literal at line 2, column 0
.
Using --output=json
mentioned above can be used to fix the error.
you can use jq -r
flag to remove quotation chars "
from query results instead of sed
btw
for some bizarre reason, this line does not work for me:
version_id="$(jq --raw-output ".[${index}].VersionId" <<< "$1")"
There is actually a much simpler and faster approach:
bucket=$1
fileToDelete=$2
deleteBefore=$3
fileName='aws_delete.json'
rm $fileName
versionsToDelete=`aws s3api list-object-versions --bucket "$bucket" --prefix "$fileToDelete" --query "Versions[?(LastModified<'$deleteBefore')].{Key: Key, VersionId: VersionId}"`
cat << EOF > $fileName
{"Objects":$versionsToDelete, "Quiet":true}
EOF
aws s3api delete-objects --bucket "$bucket" --delete file://$fileName
s3api delete-objects can handle up to 1000 records.
Want to do more advance stuff? Check out my gist.