Last active
October 14, 2019 15:13
-
-
Save jazzl0ver/29323d27781ae8cd1a6434e7268247a1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Iterates over all available RDS instances in all regions and creates snapshots of them along with dumping (if necessary). Dump | |
# is encrypted if the database itself is encrypted at rest. Only dumping MySQL instances is implemented at the moment | |
# | |
# Usage: | |
# $ aws configure - configure AWS access and secret keys | |
# $ vi .rds_backup - setup environment variables required for the script (see below) | |
# $ source .rds_backup - export the environment variables | |
# $ ./rds_backup.sh [(makedump|anyotherword) [db-instance]] | |
# where: | |
# makedump - dump (export) the database data to a file; | |
# adds 'Dumped=YYYY-MM-DD-HH-MI' tag to the DB instance if dumping was successful | |
# anyotherword - no dumping | |
# db-instance - DB instance name to backup to skip iterating over all instances | |
# | |
# Following tags are used to manage the process: | |
# - Backup=yes - DB instances will not be backed up unless this tag is set | |
# - Dump=yes - dump the DB instance (a new db instance is launched and then deleted for the purpose of dumping); | |
# set DB_USER and DB_PASS environment variables - the DB user credentials | |
# the DB user must have appropriate priviledges set, for example (for MySQL): | |
# GRANT SELECT, SHOW DATABASES, LOCK TABLES, SHOW VIEW, EVENT ON *.* TO 'dump'@'%' | |
# DAYS_DUMPS_RETENTION environment variable manages the days number to keep the dumps for in BACKUPPATH | |
# - DumpExcludeTables="db1.table1 db2.table2 ..." - Don't dump specified tables | |
# - Purgable=yes - snapshot is allowed to be purged after PurgeAfter days | |
# - PurgeAfter=N - purge the snapshot after N days; N is set to DAYS_SS_RETENTION environment variable | |
# | |
# DB's subnet group, VPC security and parameter groups are saved in the snapshot tags to simplify further restoration | |
# | |
# If a DB instance is encrypted at rest and Dump tag is set, the DB's dump will be encrypted with AES_256 cipher | |
# using the KeyId specified in KMS_DATA_KEYID environment variable. | |
# Set KMS_PASS_KEYID and DB_PASS_ENC to decrypt mysql user password before proceeding further. | |
# Make sure to create the Customer Master Keys before running the script and add the AWS IAM user to the Key's users list | |
# IMPORTANT: add your custom key-value pairs to the ENC_CONTEXT environment variable to make the encryption more secure | |
# | |
# Passwords were encrypted using this command: | |
# aws --region us-east-1 kms encrypt --key-id 'alias/keyid' --plaintext file://p --encryption-context 'context=context' --output json | |
# | |
# Command to decrypt the encrypted dumps: | |
# cat db-YYYY-MM-DD-HH-MI.sql.bz2.enc | KEY=$(cat db-YYYY-MM-DD-HH-MI.key | base64 --decode | aws kms decrypt --encryption-context "Key1=Value1,Key2=Value2,..." --ciphertext-blob fileb:///dev/stdin --query 'Plaintext' --output text | base64 --decode) openssl enc -aes-256-cbc -d -in /dev/stdin -pass env:KEY | bunzip2 -c > db-YYYY-MM-DD-HH-MI.sql | |
# where --encryption-context is the context that was used for data key generation | |
# | |
# Sample IAM policy for RDS backup user or instance role: | |
# { | |
# "Version": "2012-10-17", | |
# "Statement": [ | |
# { | |
# "Sid": "Stmt1474478969000", | |
# "Effect": "Allow", | |
# "Action": [ | |
# "rds:*" | |
# ], | |
# "Resource": [ | |
# "arn:aws:rds:*:*:db:mnl-*", | |
# "arn:aws:rds:*:*:db:ss-*" | |
# ] | |
# }, | |
# { | |
# "Sid": "Stmt1474478969001", | |
# "Effect": "Allow", | |
# "Action": [ | |
# "rds:Describe*", | |
# "rds:ListTagsForResource", | |
# "rds:AddTagsToResource", | |
# "ec2:DescribeAccountAttributes", | |
# "ec2:DescribeAvailabilityZones", | |
# "ec2:DescribeSecurityGroups", | |
# "ec2:DescribeVpcs", | |
# "iam:GetUser" | |
# ], | |
# "Resource": "*" | |
# }, | |
# { | |
# "Sid": "Stmt1474478969002", | |
# "Effect": "Allow", | |
# "Action": [ | |
# "rds:ModifyDBInstance" | |
# ], | |
# "Resource": [ | |
# "arn:aws:rds:*:*:pg:*", | |
# "arn:aws:rds:*:*:secgrp:*", | |
# "arn:aws:rds:*:*:og:*" | |
# ] | |
# }, | |
# { | |
# "Sid": "VisualEditor1", | |
# "Effect": "Allow", | |
# "Action": "kms:Decrypt", | |
# "Resource": "arn:aws:kms:*:*:key/..." | |
# }, | |
# { | |
# "Sid": "VisualEditor2", | |
# "Effect": "Allow", | |
# "Action": "kms:GenerateDataKey", | |
# "Resource": "arn:aws:kms:*:*:key/..." | |
# } | |
# ] | |
# } | |
# | |
# | |
# (C) 2019, jazzl0ver | |
# | |
#-- set region(s) manually (space separated); if empty the script will iterate thru all available regions | |
regions=${REGIONS:="us-east-1"} | |
#-- Snapshot name prefix; make sure to specify it in the IAM policy | |
PREFIX=${PREFIX:=mnl} | |
#-- DB instance name prefix used for dumping; make sure to specify it in the IAM policy | |
DUMPDBPREFIX=${DUMPDBPREFIX:=ss} | |
#-- MySQL user with backup priviledge | |
USER=${DB_USER:=backup} | |
PASS=${DB_PASS:=pass} | |
#-- if set, the password will be decrypted at first | |
ENC_PASS=${DB_PASS_ENC} | |
#-- AWS KMS Customer Master Key (ID, alias or ARN are allowed) to decrypt ENC_PASS | |
PASS_KEYID=${KMS_PASS_KEYID:="alias/mybackupkey"} | |
#-- AWS KMS Customer Master Key (ID, alias or ARN are allowed) for data encryption | |
DATA_KEYID=${KMS_DATA_KEYID:="alias/mybackupkey"} | |
#-- Encryption context for stronger encryption (Filename=/path/to/backup pair is automatically added in the end before dumping) | |
ENC_CONTEXT=${ENC_CONTEXT:="Name=mysecrets"} | |
#-- Where to store dumps and encryption keys | |
BACKUPPATH=${BACKUPPATH:=/vol/db-dump} | |
#-- Days to keep snapshots (0 means to delete right after dumping) | |
DAYS_SS_RETENTION=${DAYS_SS_RETENTION:=0} | |
#-- Days to keep dumps | |
DAYS_DUMPS_RETENTION=${DAYS_DUMPS_RETENTION:=1} | |
#-- Email to send alerts to | |
ADMIN_EMAIL=${ADMIN_EMAIL:=admin@localhost} | |
if [ -n "$ENC_PASS" ]; then | |
PASS=$(echo "$ENC_PASS" | base64 --decode | aws --region us-east-1 kms decrypt --encryption-context "context=$PASS_KEYID" --ciphertext-blob fileb:///dev/stdin --query Plaintext --output text | base64 --decode) | |
fi | |
#-- "makedump" magic key to dump databases tagged with "Dump=yes"; use any other word/char to skip dumping | |
DODUMP=$1 | |
#-- backup a single DB instance specified in the 2nd argument (DB instance must be tagged with Backup=yes) | |
DBNAME=$2 | |
date=$(date --utc +%Y-%m-%d-%H-%M) | |
#-- iterates the expression until the output is non-zero | |
wait_until() | |
{ | |
result=`eval $* | sed 's/ //g'` | |
if [[ $result == 0 ]] | |
then | |
sleep 60 | |
wait_until $* | |
fi | |
} | |
#-- iterates the expression until the output is zero | |
wait_till() | |
{ | |
result=`eval $* | sed 's/ //g'` | |
if [[ $result != 0 ]] | |
then | |
sleep 60 | |
wait_till $* | |
fi | |
} | |
#-- pause running while the system is overloaded | |
wait_la() | |
{ | |
#-- if load average for last 5 mins more than 3, wait for 5 minutes before continue | |
LA_5min=$(uptime | cut -f5 -d,) | |
LA_5min=${LA_5min%.*} | |
while [ $LA_5min -gt 3 ]; do | |
echo "$(date) *** System is overloaded, waiting for 5 minutes before continue..." | |
sleep 300 | |
LA_5min=$(uptime | cut -f5 -d,) | |
LA_5min=${LA_5min%.*} | |
done | |
} | |
#-- dump the database | |
make_dump() | |
{ | |
snapid=$1 | |
db=$2 | |
SUBNETGROUP=$3 | |
VPCSECGROUP=$4 | |
PARAMSGROUP=$5 | |
if [ -z $snapid -o -z $db ]; then | |
echo "$(date) *** Can't extract required tags. Dumping failed, exiting.." | |
exit 255 | |
fi | |
if [ -z "$SUBNETGROUP" -o -z "$VPCSECGROUP" -o -z "$PARAMSGROUP" ]; then | |
echo "$(date) *** Can't extract required tags. Dumping failed, exiting.." | |
exit 255 | |
fi | |
#-- db instance engine | |
ENGINE=$(aws --region $region rds describe-db-instances --db-instance-identifier $db --query 'DBInstances[].Engine' --output text) | |
#-- db instance that will be used for dumping | |
dumpdb="$DUMPDBPREFIX-$db-$date" | |
#-- Copy GROUP tag to the new db instance | |
GROUP=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Group'].Value" --output text) | |
GROUP_TAG="" | |
[ ! -z $GROUP ] && GROUP_TAG="--tags Key=Group,Value=${GROUP}" | |
#-- Get list of tables to exclude from the dump | |
DUMPEXCLUDETABLES=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='DumpExcludeTables'].Value" --output text) | |
#-- Prepare to use with mysqldump | |
unset EXCLUDETABLES | |
for table in $(echo $DUMPEXCLUDETABLES); do | |
EXCLUDETABLES="${EXCLUDETABLES} --ignore-table $table" | |
done | |
#-- check if the db instance is encrypted at rest | |
ENCRYPTED=$(aws --region $region rds describe-db-instances --db-instance-identifier $db --query 'DBInstances[].StorageEncrypted' --output text) | |
if [ "$ENCRYPTED" = "True" ]; then | |
#-- db.m3.medium is the minimal class supported for encrypted DBs | |
class=db.m3.medium | |
else | |
class=db.t3.small | |
fi | |
echo "$(date) *** Launching $dumpdb from snapshot $snapid..." | |
#-- launch new db instance from the snapshot | |
aws --region $region rds restore-db-instance-from-db-snapshot --db-instance-identifier $dumpdb --db-snapshot-identifier $snapid \ | |
--availability-zone us-east-1a --db-instance-class $class --db-subnet-group-name $SUBNETGROUP \ | |
--no-auto-minor-version-upgrade ${GROUP_TAG} || exit 255 | |
INSTANCE_AVAILABILITY="aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb --query 'DBInstances[].DBInstanceStatus' --output text | grep available | wc -l" | |
wait_until $INSTANCE_AVAILABILITY | |
echo "$(date) *** Applying parameters to $dumpdb..." | |
aws --region $region rds modify-db-instance --db-instance-identifier $dumpdb --backup-retention-period 0 \ | |
--db-parameter-group-name $PARAMSGROUP --vpc-security-group-ids $VPCSECGROUP || exit 255 | |
PARAM_CHANGES="aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb --query 'DBInstances[].DBParameterGroups[].ParameterApplyStatus' --output text | grep -E 'pending-reboot|available' |wc -l" | |
wait_until $PARAM_CHANGES | |
INSTANCE_AVAILABILITY="aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb --query 'DBInstances[].DBInstanceStatus' --output text | grep available | wc -l" | |
wait_until $INSTANCE_AVAILABILITY | |
echo "$(date) *** Rebooting $dumpdb..." | |
aws --region $region rds reboot-db-instance --db-instance-identifier $dumpdb | |
if [ $? -ne 0 ]; then | |
echo "$(date) *** Can't reboot $dumpdb. Trying one more time..." | |
aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb | |
echo "$(date) *** Rebooting $dumpdb one more time..." | |
aws --region $region rds reboot-db-instance --db-instance-identifier $dumpdb | |
if [ $? -ne 0 ]; then | |
echo "$(date) *** Can't reboot 2nd time $dumpdb. Manual intervention required, exiting..." | |
aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb | |
/bin/rm $LOCK | |
exit 255 | |
fi | |
fi | |
wait_until $INSTANCE_AVAILABILITY | |
#-- get IP address of the new db instance | |
ENDPOINT=$(aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb --query 'DBInstances[].Endpoint.Address' --output text) | |
#-- encrypt the dump if the db instance is encrypted at rest | |
if [ "$ENCRYPTED" = "True" ]; then | |
CONTEXT="$ENC_CONTEXT,Filename=$BACKUPPATH/$db-$date.sql.bz2" | |
#-- generate a key for the dump encryption | |
declare -A datakey | |
while IFS=":" read -r key value | |
do | |
datakey[$key]="$value" | |
done < <(aws kms generate-data-key --key-id "$DATA_KEYID" --encryption-context $CONTEXT --key-spec AES_256 \ | |
| jq -r "to_entries|map(\"\(.key):\(.value|tostring)\")|.[]") | |
#-- plaintext key is used for encryption and then deleted from memory | |
KEY=$(echo ${datakey[Plaintext]} | base64 --decode) | |
export KEY | |
#-- dump, encrypt and compress | |
echo "$(date) *** Dumping and encrypting $dumpdb, using $ENDPOINT as an endpoint..." | |
if [ "$ENGINE" = "mysql" ]; then | |
export MYSQL_PWD=$PASS | |
/usr/bin/mysqldump --opt --events -h $ENDPOINT -u $USER -A $EXCLUDETABLES \ | |
| bzip2 \ | |
| /usr/bin/openssl enc -aes-256-cbc -salt -in /dev/stdin -out /dev/stdout -pass env:KEY > $BACKUPPATH/$db-$date.sql.bz2.enc | |
fi | |
#-- store the encrypted encryption key along with the dump; use AWS KMS to decrypt it when needed | |
echo ${datakey[CiphertextBlob]} > $BACKUPPATH/$db-$date.key | |
unset KEY | |
else | |
#-- no encryption needed - just dump and compress | |
if [ "$ENGINE" = "mysql" ]; then | |
echo "$(date) *** Dumping $dumpdb, using $ENDPOINT as an endpoint..." | |
export MYSQL_PWD=$PASS | |
/usr/bin/mysqldump --opt --events -h $ENDPOINT -u $USER -A $EXCLUDETABLES \ | |
| bzip2 > $BACKUPPATH/$db-$date.sql.bz2 | |
fi | |
fi | |
#-- tag the original db instance | |
echo "$(date) *** Tagging the original db $ARN..." | |
aws --region $region rds add-tags-to-resource --resource-name $ARN --tags "Key=Dumped,Value=$date" | |
#-- tag the snapshot of the original db instance; used later in the code for checking if dumping is finished | |
#-- and the snapshot is ready to be deleted | |
echo "$(date) *** Tagging original snaphost $snapid..." | |
aws --region $region rds add-tags-to-resource --resource-name "arn:aws:rds:$region:$accountid:snapshot:$snapid" --tags "Key=Dumped,Value=$date" | |
#-- delete the db instance used for dumping | |
echo "$(date) *** Deleting $dumpdb..." | |
aws --region $region rds delete-db-instance --db-instance-identifier $dumpdb --skip-final-snapshot | |
#-- save db configuration and tags | |
aws --region $region rds describe-db-instances --db-instance-identifier $db --output json > $BACKUPPATH/$db-$date.txt | |
aws --region $region rds list-tags-for-resource --resource-name $ARN --output json >> $BACKUPPATH/$db-$date.txt | |
#-- clean up old dumps | |
echo "$(date) *** Cleaning up old dumps..." | |
find $BACKUPPATH -type f -mtime +$DAYS_DUMPS_RETENTION -delete | |
} | |
#-- alert if we're already running | |
LOCK=/tmp/.$(basename $0).$DBNAME | |
if [ -e $LOCK ]; then | |
echo "$(date) *** Something went wrong: $(basename $0) is already running. Manual interaction needed." | mail -s "rds_backup failed" $ADMIN_EMAIL | |
/bin/rm $LOCK | |
exit | |
fi | |
echo $$ > $LOCK | |
#-- get AWS account ID | |
accountid=$(aws sts get-caller-identity --output text --query 'Account') | |
[ -z $accountid ] && exit 255 | |
[ -z $regions ] && regions=$(ec2-describe-regions | grep REGION | cut -f2) | |
for region in $regions; do | |
for db in `aws --region $region rds describe-db-instances --query "DBInstances[].DBInstanceIdentifier" --output text`; do | |
#-- skip other db instances than the one specified in the command line | |
[ -z "$DBNAME" -o "$db" = "$DBNAME" ] || continue | |
ARN="arn:aws:rds:$region:$accountid:db:$db" | |
#-- don't backup a db instance unless it's tagged with "Backup=yes" | |
BACKUP=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Backup'].Value" --output text | grep -i yes) | |
[ -z $BACKUP ] && continue | |
echo "$(date) *** Backing up $db in $region..." | |
#-- check if the db instance is tagged for dumping (Dump=yes) | |
DUMP=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Dump'].Value" --output text | grep -i yes | wc -l) | |
#-- wait until the db instance is ready for backup | |
INSTANCE_AVAILABILITY="aws --region $region rds describe-db-instances --db-instance-identifier $db --query 'DBInstances[].DBInstanceStatus' --output text | grep -E 'available|stopped' | wc -l" | |
wait_until $INSTANCE_AVAILABILITY | |
SNAPID="$PREFIX-$db-$date" | |
#-- save DB's subnet group, VPC security group and parameters group in the snapshot tags | |
tmp=$(aws --region $region rds describe-db-instances --db-instance-identifier $db --output json) | |
SUBNETGROUP=$(echo $tmp | /usr/bin/jq -r '.DBInstances[].DBSubnetGroup.DBSubnetGroupName') | |
VPCSECGROUP=$(echo $tmp | /usr/bin/jq -r '.DBInstances[].VpcSecurityGroups[].VpcSecurityGroupId' | xargs) | |
PARAMSGROUP=$(echo $tmp | /usr/bin/jq -r '.DBInstances[].DBParameterGroups[].DBParameterGroupName') | |
#-- GROUP - is a special tag for grouping resources in AWS billing, non-mandatory | |
GROUP=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Group'].Value" --output text) | |
#-- create db instance snapshot with the Dump tag (if exists) | |
DUMP_TAG="" | |
[ "x$DUMP" = "x1" ] && DUMP_TAG="Key=Dump,Value=yes" | |
echo "$(date) *** Creating snapshot $SNAPID of $db..." | |
aws --region $region rds create-db-snapshot --db-instance-identifier $db --db-snapshot-identifier $SNAPID \ | |
--tags "Key=Purgable,Value=yes" $(echo $DUMP_TAG) "Key=PurgeAfter,Value=$DAYS_SS_RETENTION" "Key=DBSubnetGroupName,Value=$SUBNETGROUP" "Key=DBParameterGroupName,Value=$PARAMSGROUP" "Key=VpcSecurityGroupId,Value='$VPCSECGROUP'" "Key=Group,Value=$GROUP" || exit 255 | |
SNAPSHOT_AVAILABILITY="aws --region $region rds describe-db-snapshots --db-snapshot-identifier $SNAPID --query 'DBSnapshots[].Status' --output text | grep available | wc -l" | |
wait_until $SNAPSHOT_AVAILABILITY | |
#-- launch the dumping procedure in background if the db instance is tagged with "Dump=yes" | |
#-- and "makedump" keyword is specified in the command line | |
if [ "$DODUMP" = "makedump" -a "x$DUMP" = "x1" ]; then | |
wait_la | |
echo "$(date) *** Dumping $SNAPID of $db..." | |
make_dump $SNAPID $db $SUBNETGROUP "$VPCSECGROUP" $PARAMSGROUP | |
fi | |
done | |
#-- clean up old snapshots | |
echo "$(date) *** Cleaning up..." | |
#-- retrieve all manual RDS snapshots | |
for snapid in `aws --region $region rds describe-db-snapshots --snapshot-type manual --query "DBSnapshots[].DBSnapshotIdentifier" --output text`; do | |
ARN="arn:aws:rds:$region:$accountid:snapshot:$snapid" | |
#-- delete old snapshots only if there's a tag "Purgable=yes" | |
PURGE=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Purgable'].Value" --output text | grep -i yes) | |
[ -z $PURGE ] && continue | |
#-- retrieve how many days to keep the snapshot for the snapshot's tag "PurgeAfter" | |
PURGEAFTER=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='PurgeAfter'].Value" --output text) | |
#-- get snapshot's creation time and extract the date | |
snap_date=$(aws --region $region rds describe-db-snapshots --db-snapshot-identifier $snapid --query "DBSnapshots[].SnapshotCreateTime" --output text | cut -f1 -dT) | |
#-- calculate days amount since snapshot creation till now | |
days_since=$(( ( $(date -ud 'now' +'%s') - $(date -ud "$snap_date" +'%s') )/60/60/24 )) | |
#-- purge the snapshot if it's old enough | |
if [ $days_since -ge $PURGEAFTER ]; then | |
DUMP=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Dump'].Value" --output text | grep -i yes | wc -l) | |
if [ "x$DUMP" = "x1" ]; then | |
#-- calculate db instance name used for dumping | |
dumpdb=$(echo $snapid | sed -e "s/^${PREFIX}/${DUMPDBPREFIX}/") | |
#-- we can't delete a snapshot until the dumping purposes db instance exists | |
INSTANCE_DELETED="aws --region $region rds describe-db-instances --query 'DBInstances[].DBInstanceIdentifier' --output text | grep $dumpdb | wc -l" | |
wait_till $INSTANCE_DELETED | |
echo "$(date) *** Deleting $snapid..." | |
aws --region $region rds delete-db-snapshot --db-snapshot-identifier $snapid | |
else | |
#-- db instance was not supposed to be dumped | |
echo "$(date) *** Deleting $snapid..." | |
aws --region $region rds delete-db-snapshot --db-snapshot-identifier $snapid | |
fi | |
fi | |
done | |
done | |
/bin/rm $LOCK | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment