Skip to content

Instantly share code, notes, and snippets.

@denisshevchenko
Created September 13, 2016 07:02
Show Gist options
  • Save denisshevchenko/3edcc28ebe9a028ebe8f5f6e9a0e0481 to your computer and use it in GitHub Desktop.
Save denisshevchenko/3edcc28ebe9a028ebe8f5f6e9a0e0481 to your computer and use it in GitHub Desktop.
#!/bin/bash
set -efuo pipefail
move='aws s3 mv'
list='aws s3 ls'
function cleanS3Bucket {
rootName="live-$1"
rootPath="s3://$rootName"
garbage="$rootPath/garbage"
echo "Check bucket $rootPath..."
echo "STEP 1: Duplicate media-live -> garbage..."
duplicateMediaLive=$($list --human-readable "$rootPath/$rootName" \
|| echo "No duplicate media-live, skipped.")
if [[ -n "$duplicateMediaLive" ]]; then
$move "$rootPath/$rootName" "$garbage/$rootName" --recursive
fi
echo "STEP 2: Files with digits-only prefix -> garbage..."
$list --human-readable "$rootPath/" \
| tr ' ' '\n' \
| grep -E '^[0-9]+/$' || true \
| while read s; do
$move "$rootPath/$s" "$garbage/$s" --recursive
done
echo "STEP 3: Blank name prefix ("empty path //") -> garbage..."
withBlankPath=$($list --human-readable "$rootPath//" \
|| echo "No prefix with blank name, skipped.")
if [[ -n "$withBlankPath" ]]; then
$move "$rootPath//" "$garbage//" --recursive
fi
echo "STEP 4: Files with timestamps -> garbage..."
$list --human-readable --recursive --page-size 500 "$rootPath/product" \
| tr ' ' '\n' \
| grep -E '[0-9]-[0-9]{10}.jpg' || true \
| while read f; do
$move "$rootPath/$f" "$garbage/$f"
done
echo "It's done for the bucket $rootPath."
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment