Skip to content

Instantly share code, notes, and snippets.

@jazzl0ver
Last active October 14, 2019 15:13
Show Gist options
  • Save jazzl0ver/29323d27781ae8cd1a6434e7268247a1 to your computer and use it in GitHub Desktop.
Save jazzl0ver/29323d27781ae8cd1a6434e7268247a1 to your computer and use it in GitHub Desktop.
#!/bin/bash
#
# Iterates over all available RDS instances in all regions and creates snapshots of them along with dumping (if necessary). Dump
# is encrypted if the database itself is encrypted at rest. Only dumping MySQL instances is implemented at the moment
#
# Usage:
# $ aws configure - configure AWS access and secret keys
# $ vi .rds_backup - setup environment variables required for the script (see below)
# $ source .rds_backup - export the environment variables
# $ ./rds_backup.sh [(makedump|anyotherword) [db-instance]]
# where:
# makedump - dump (export) the database data to a file;
# adds 'Dumped=YYYY-MM-DD-HH-MI' tag to the DB instance if dumping was successful
# anyotherword - no dumping
# db-instance - DB instance name to backup to skip iterating over all instances
#
# Following tags are used to manage the process:
# - Backup=yes - DB instances will not be backed up unless this tag is set
# - Dump=yes - dump the DB instance (a new db instance is launched and then deleted for the purpose of dumping);
# set DB_USER and DB_PASS environment variables - the DB user credentials
# the DB user must have appropriate priviledges set, for example (for MySQL):
# GRANT SELECT, SHOW DATABASES, LOCK TABLES, SHOW VIEW, EVENT ON *.* TO 'dump'@'%'
# DAYS_DUMPS_RETENTION environment variable manages the days number to keep the dumps for in BACKUPPATH
# - DumpExcludeTables="db1.table1 db2.table2 ..." - Don't dump specified tables
# - Purgable=yes - snapshot is allowed to be purged after PurgeAfter days
# - PurgeAfter=N - purge the snapshot after N days; N is set to DAYS_SS_RETENTION environment variable
#
# DB's subnet group, VPC security and parameter groups are saved in the snapshot tags to simplify further restoration
#
# If a DB instance is encrypted at rest and Dump tag is set, the DB's dump will be encrypted with AES_256 cipher
# using the KeyId specified in KMS_DATA_KEYID environment variable.
# Set KMS_PASS_KEYID and DB_PASS_ENC to decrypt mysql user password before proceeding further.
# Make sure to create the Customer Master Keys before running the script and add the AWS IAM user to the Key's users list
# IMPORTANT: add your custom key-value pairs to the ENC_CONTEXT environment variable to make the encryption more secure
#
# Passwords were encrypted using this command:
# aws --region us-east-1 kms encrypt --key-id 'alias/keyid' --plaintext file://p --encryption-context 'context=context' --output json
#
# Command to decrypt the encrypted dumps:
# cat db-YYYY-MM-DD-HH-MI.sql.bz2.enc | KEY=$(cat db-YYYY-MM-DD-HH-MI.key | base64 --decode | aws kms decrypt --encryption-context "Key1=Value1,Key2=Value2,..." --ciphertext-blob fileb:///dev/stdin --query 'Plaintext' --output text | base64 --decode) openssl enc -aes-256-cbc -d -in /dev/stdin -pass env:KEY | bunzip2 -c > db-YYYY-MM-DD-HH-MI.sql
# where --encryption-context is the context that was used for data key generation
#
# Sample IAM policy for RDS backup user or instance role:
# {
# "Version": "2012-10-17",
# "Statement": [
# {
# "Sid": "Stmt1474478969000",
# "Effect": "Allow",
# "Action": [
# "rds:*"
# ],
# "Resource": [
# "arn:aws:rds:*:*:db:mnl-*",
# "arn:aws:rds:*:*:db:ss-*"
# ]
# },
# {
# "Sid": "Stmt1474478969001",
# "Effect": "Allow",
# "Action": [
# "rds:Describe*",
# "rds:ListTagsForResource",
# "rds:AddTagsToResource",
# "ec2:DescribeAccountAttributes",
# "ec2:DescribeAvailabilityZones",
# "ec2:DescribeSecurityGroups",
# "ec2:DescribeVpcs",
# "iam:GetUser"
# ],
# "Resource": "*"
# },
# {
# "Sid": "Stmt1474478969002",
# "Effect": "Allow",
# "Action": [
# "rds:ModifyDBInstance"
# ],
# "Resource": [
# "arn:aws:rds:*:*:pg:*",
# "arn:aws:rds:*:*:secgrp:*",
# "arn:aws:rds:*:*:og:*"
# ]
# },
# {
# "Sid": "VisualEditor1",
# "Effect": "Allow",
# "Action": "kms:Decrypt",
# "Resource": "arn:aws:kms:*:*:key/..."
# },
# {
# "Sid": "VisualEditor2",
# "Effect": "Allow",
# "Action": "kms:GenerateDataKey",
# "Resource": "arn:aws:kms:*:*:key/..."
# }
# ]
# }
#
#
# (C) 2019, jazzl0ver
#
#-- set region(s) manually (space separated); if empty the script will iterate thru all available regions
regions=${REGIONS:="us-east-1"}
#-- Snapshot name prefix; make sure to specify it in the IAM policy
PREFIX=${PREFIX:=mnl}
#-- DB instance name prefix used for dumping; make sure to specify it in the IAM policy
DUMPDBPREFIX=${DUMPDBPREFIX:=ss}
#-- MySQL user with backup priviledge
USER=${DB_USER:=backup}
PASS=${DB_PASS:=pass}
#-- if set, the password will be decrypted at first
ENC_PASS=${DB_PASS_ENC}
#-- AWS KMS Customer Master Key (ID, alias or ARN are allowed) to decrypt ENC_PASS
PASS_KEYID=${KMS_PASS_KEYID:="alias/mybackupkey"}
#-- AWS KMS Customer Master Key (ID, alias or ARN are allowed) for data encryption
DATA_KEYID=${KMS_DATA_KEYID:="alias/mybackupkey"}
#-- Encryption context for stronger encryption (Filename=/path/to/backup pair is automatically added in the end before dumping)
ENC_CONTEXT=${ENC_CONTEXT:="Name=mysecrets"}
#-- Where to store dumps and encryption keys
BACKUPPATH=${BACKUPPATH:=/vol/db-dump}
#-- Days to keep snapshots (0 means to delete right after dumping)
DAYS_SS_RETENTION=${DAYS_SS_RETENTION:=0}
#-- Days to keep dumps
DAYS_DUMPS_RETENTION=${DAYS_DUMPS_RETENTION:=1}
#-- Email to send alerts to
ADMIN_EMAIL=${ADMIN_EMAIL:=admin@localhost}
if [ -n "$ENC_PASS" ]; then
PASS=$(echo "$ENC_PASS" | base64 --decode | aws --region us-east-1 kms decrypt --encryption-context "context=$PASS_KEYID" --ciphertext-blob fileb:///dev/stdin --query Plaintext --output text | base64 --decode)
fi
#-- "makedump" magic key to dump databases tagged with "Dump=yes"; use any other word/char to skip dumping
DODUMP=$1
#-- backup a single DB instance specified in the 2nd argument (DB instance must be tagged with Backup=yes)
DBNAME=$2
date=$(date --utc +%Y-%m-%d-%H-%M)
#-- iterates the expression until the output is non-zero
wait_until()
{
result=`eval $* | sed 's/ //g'`
if [[ $result == 0 ]]
then
sleep 60
wait_until $*
fi
}
#-- iterates the expression until the output is zero
wait_till()
{
result=`eval $* | sed 's/ //g'`
if [[ $result != 0 ]]
then
sleep 60
wait_till $*
fi
}
#-- pause running while the system is overloaded
wait_la()
{
#-- if load average for last 5 mins more than 3, wait for 5 minutes before continue
LA_5min=$(uptime | cut -f5 -d,)
LA_5min=${LA_5min%.*}
while [ $LA_5min -gt 3 ]; do
echo "$(date) *** System is overloaded, waiting for 5 minutes before continue..."
sleep 300
LA_5min=$(uptime | cut -f5 -d,)
LA_5min=${LA_5min%.*}
done
}
#-- dump the database
make_dump()
{
snapid=$1
db=$2
SUBNETGROUP=$3
VPCSECGROUP=$4
PARAMSGROUP=$5
if [ -z $snapid -o -z $db ]; then
echo "$(date) *** Can't extract required tags. Dumping failed, exiting.."
exit 255
fi
if [ -z "$SUBNETGROUP" -o -z "$VPCSECGROUP" -o -z "$PARAMSGROUP" ]; then
echo "$(date) *** Can't extract required tags. Dumping failed, exiting.."
exit 255
fi
#-- db instance engine
ENGINE=$(aws --region $region rds describe-db-instances --db-instance-identifier $db --query 'DBInstances[].Engine' --output text)
#-- db instance that will be used for dumping
dumpdb="$DUMPDBPREFIX-$db-$date"
#-- Copy GROUP tag to the new db instance
GROUP=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Group'].Value" --output text)
GROUP_TAG=""
[ ! -z $GROUP ] && GROUP_TAG="--tags Key=Group,Value=${GROUP}"
#-- Get list of tables to exclude from the dump
DUMPEXCLUDETABLES=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='DumpExcludeTables'].Value" --output text)
#-- Prepare to use with mysqldump
unset EXCLUDETABLES
for table in $(echo $DUMPEXCLUDETABLES); do
EXCLUDETABLES="${EXCLUDETABLES} --ignore-table $table"
done
#-- check if the db instance is encrypted at rest
ENCRYPTED=$(aws --region $region rds describe-db-instances --db-instance-identifier $db --query 'DBInstances[].StorageEncrypted' --output text)
if [ "$ENCRYPTED" = "True" ]; then
#-- db.m3.medium is the minimal class supported for encrypted DBs
class=db.m3.medium
else
class=db.t3.small
fi
echo "$(date) *** Launching $dumpdb from snapshot $snapid..."
#-- launch new db instance from the snapshot
aws --region $region rds restore-db-instance-from-db-snapshot --db-instance-identifier $dumpdb --db-snapshot-identifier $snapid \
--availability-zone us-east-1a --db-instance-class $class --db-subnet-group-name $SUBNETGROUP \
--no-auto-minor-version-upgrade ${GROUP_TAG} || exit 255
INSTANCE_AVAILABILITY="aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb --query 'DBInstances[].DBInstanceStatus' --output text | grep available | wc -l"
wait_until $INSTANCE_AVAILABILITY
echo "$(date) *** Applying parameters to $dumpdb..."
aws --region $region rds modify-db-instance --db-instance-identifier $dumpdb --backup-retention-period 0 \
--db-parameter-group-name $PARAMSGROUP --vpc-security-group-ids $VPCSECGROUP || exit 255
PARAM_CHANGES="aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb --query 'DBInstances[].DBParameterGroups[].ParameterApplyStatus' --output text | grep -E 'pending-reboot|available' |wc -l"
wait_until $PARAM_CHANGES
INSTANCE_AVAILABILITY="aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb --query 'DBInstances[].DBInstanceStatus' --output text | grep available | wc -l"
wait_until $INSTANCE_AVAILABILITY
echo "$(date) *** Rebooting $dumpdb..."
aws --region $region rds reboot-db-instance --db-instance-identifier $dumpdb
if [ $? -ne 0 ]; then
echo "$(date) *** Can't reboot $dumpdb. Trying one more time..."
aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb
echo "$(date) *** Rebooting $dumpdb one more time..."
aws --region $region rds reboot-db-instance --db-instance-identifier $dumpdb
if [ $? -ne 0 ]; then
echo "$(date) *** Can't reboot 2nd time $dumpdb. Manual intervention required, exiting..."
aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb
/bin/rm $LOCK
exit 255
fi
fi
wait_until $INSTANCE_AVAILABILITY
#-- get IP address of the new db instance
ENDPOINT=$(aws --region $region rds describe-db-instances --db-instance-identifier $dumpdb --query 'DBInstances[].Endpoint.Address' --output text)
#-- encrypt the dump if the db instance is encrypted at rest
if [ "$ENCRYPTED" = "True" ]; then
CONTEXT="$ENC_CONTEXT,Filename=$BACKUPPATH/$db-$date.sql.bz2"
#-- generate a key for the dump encryption
declare -A datakey
while IFS=":" read -r key value
do
datakey[$key]="$value"
done < <(aws kms generate-data-key --key-id "$DATA_KEYID" --encryption-context $CONTEXT --key-spec AES_256 \
| jq -r "to_entries|map(\"\(.key):\(.value|tostring)\")|.[]")
#-- plaintext key is used for encryption and then deleted from memory
KEY=$(echo ${datakey[Plaintext]} | base64 --decode)
export KEY
#-- dump, encrypt and compress
echo "$(date) *** Dumping and encrypting $dumpdb, using $ENDPOINT as an endpoint..."
if [ "$ENGINE" = "mysql" ]; then
export MYSQL_PWD=$PASS
/usr/bin/mysqldump --opt --events -h $ENDPOINT -u $USER -A $EXCLUDETABLES \
| bzip2 \
| /usr/bin/openssl enc -aes-256-cbc -salt -in /dev/stdin -out /dev/stdout -pass env:KEY > $BACKUPPATH/$db-$date.sql.bz2.enc
fi
#-- store the encrypted encryption key along with the dump; use AWS KMS to decrypt it when needed
echo ${datakey[CiphertextBlob]} > $BACKUPPATH/$db-$date.key
unset KEY
else
#-- no encryption needed - just dump and compress
if [ "$ENGINE" = "mysql" ]; then
echo "$(date) *** Dumping $dumpdb, using $ENDPOINT as an endpoint..."
export MYSQL_PWD=$PASS
/usr/bin/mysqldump --opt --events -h $ENDPOINT -u $USER -A $EXCLUDETABLES \
| bzip2 > $BACKUPPATH/$db-$date.sql.bz2
fi
fi
#-- tag the original db instance
echo "$(date) *** Tagging the original db $ARN..."
aws --region $region rds add-tags-to-resource --resource-name $ARN --tags "Key=Dumped,Value=$date"
#-- tag the snapshot of the original db instance; used later in the code for checking if dumping is finished
#-- and the snapshot is ready to be deleted
echo "$(date) *** Tagging original snaphost $snapid..."
aws --region $region rds add-tags-to-resource --resource-name "arn:aws:rds:$region:$accountid:snapshot:$snapid" --tags "Key=Dumped,Value=$date"
#-- delete the db instance used for dumping
echo "$(date) *** Deleting $dumpdb..."
aws --region $region rds delete-db-instance --db-instance-identifier $dumpdb --skip-final-snapshot
#-- save db configuration and tags
aws --region $region rds describe-db-instances --db-instance-identifier $db --output json > $BACKUPPATH/$db-$date.txt
aws --region $region rds list-tags-for-resource --resource-name $ARN --output json >> $BACKUPPATH/$db-$date.txt
#-- clean up old dumps
echo "$(date) *** Cleaning up old dumps..."
find $BACKUPPATH -type f -mtime +$DAYS_DUMPS_RETENTION -delete
}
#-- alert if we're already running
LOCK=/tmp/.$(basename $0).$DBNAME
if [ -e $LOCK ]; then
echo "$(date) *** Something went wrong: $(basename $0) is already running. Manual interaction needed." | mail -s "rds_backup failed" $ADMIN_EMAIL
/bin/rm $LOCK
exit
fi
echo $$ > $LOCK
#-- get AWS account ID
accountid=$(aws sts get-caller-identity --output text --query 'Account')
[ -z $accountid ] && exit 255
[ -z $regions ] && regions=$(ec2-describe-regions | grep REGION | cut -f2)
for region in $regions; do
for db in `aws --region $region rds describe-db-instances --query "DBInstances[].DBInstanceIdentifier" --output text`; do
#-- skip other db instances than the one specified in the command line
[ -z "$DBNAME" -o "$db" = "$DBNAME" ] || continue
ARN="arn:aws:rds:$region:$accountid:db:$db"
#-- don't backup a db instance unless it's tagged with "Backup=yes"
BACKUP=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Backup'].Value" --output text | grep -i yes)
[ -z $BACKUP ] && continue
echo "$(date) *** Backing up $db in $region..."
#-- check if the db instance is tagged for dumping (Dump=yes)
DUMP=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Dump'].Value" --output text | grep -i yes | wc -l)
#-- wait until the db instance is ready for backup
INSTANCE_AVAILABILITY="aws --region $region rds describe-db-instances --db-instance-identifier $db --query 'DBInstances[].DBInstanceStatus' --output text | grep -E 'available|stopped' | wc -l"
wait_until $INSTANCE_AVAILABILITY
SNAPID="$PREFIX-$db-$date"
#-- save DB's subnet group, VPC security group and parameters group in the snapshot tags
tmp=$(aws --region $region rds describe-db-instances --db-instance-identifier $db --output json)
SUBNETGROUP=$(echo $tmp | /usr/bin/jq -r '.DBInstances[].DBSubnetGroup.DBSubnetGroupName')
VPCSECGROUP=$(echo $tmp | /usr/bin/jq -r '.DBInstances[].VpcSecurityGroups[].VpcSecurityGroupId' | xargs)
PARAMSGROUP=$(echo $tmp | /usr/bin/jq -r '.DBInstances[].DBParameterGroups[].DBParameterGroupName')
#-- GROUP - is a special tag for grouping resources in AWS billing, non-mandatory
GROUP=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Group'].Value" --output text)
#-- create db instance snapshot with the Dump tag (if exists)
DUMP_TAG=""
[ "x$DUMP" = "x1" ] && DUMP_TAG="Key=Dump,Value=yes"
echo "$(date) *** Creating snapshot $SNAPID of $db..."
aws --region $region rds create-db-snapshot --db-instance-identifier $db --db-snapshot-identifier $SNAPID \
--tags "Key=Purgable,Value=yes" $(echo $DUMP_TAG) "Key=PurgeAfter,Value=$DAYS_SS_RETENTION" "Key=DBSubnetGroupName,Value=$SUBNETGROUP" "Key=DBParameterGroupName,Value=$PARAMSGROUP" "Key=VpcSecurityGroupId,Value='$VPCSECGROUP'" "Key=Group,Value=$GROUP" || exit 255
SNAPSHOT_AVAILABILITY="aws --region $region rds describe-db-snapshots --db-snapshot-identifier $SNAPID --query 'DBSnapshots[].Status' --output text | grep available | wc -l"
wait_until $SNAPSHOT_AVAILABILITY
#-- launch the dumping procedure in background if the db instance is tagged with "Dump=yes"
#-- and "makedump" keyword is specified in the command line
if [ "$DODUMP" = "makedump" -a "x$DUMP" = "x1" ]; then
wait_la
echo "$(date) *** Dumping $SNAPID of $db..."
make_dump $SNAPID $db $SUBNETGROUP "$VPCSECGROUP" $PARAMSGROUP
fi
done
#-- clean up old snapshots
echo "$(date) *** Cleaning up..."
#-- retrieve all manual RDS snapshots
for snapid in `aws --region $region rds describe-db-snapshots --snapshot-type manual --query "DBSnapshots[].DBSnapshotIdentifier" --output text`; do
ARN="arn:aws:rds:$region:$accountid:snapshot:$snapid"
#-- delete old snapshots only if there's a tag "Purgable=yes"
PURGE=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Purgable'].Value" --output text | grep -i yes)
[ -z $PURGE ] && continue
#-- retrieve how many days to keep the snapshot for the snapshot's tag "PurgeAfter"
PURGEAFTER=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='PurgeAfter'].Value" --output text)
#-- get snapshot's creation time and extract the date
snap_date=$(aws --region $region rds describe-db-snapshots --db-snapshot-identifier $snapid --query "DBSnapshots[].SnapshotCreateTime" --output text | cut -f1 -dT)
#-- calculate days amount since snapshot creation till now
days_since=$(( ( $(date -ud 'now' +'%s') - $(date -ud "$snap_date" +'%s') )/60/60/24 ))
#-- purge the snapshot if it's old enough
if [ $days_since -ge $PURGEAFTER ]; then
DUMP=$(aws --region $region rds list-tags-for-resource --resource-name $ARN --query "TagList[?Key=='Dump'].Value" --output text | grep -i yes | wc -l)
if [ "x$DUMP" = "x1" ]; then
#-- calculate db instance name used for dumping
dumpdb=$(echo $snapid | sed -e "s/^${PREFIX}/${DUMPDBPREFIX}/")
#-- we can't delete a snapshot until the dumping purposes db instance exists
INSTANCE_DELETED="aws --region $region rds describe-db-instances --query 'DBInstances[].DBInstanceIdentifier' --output text | grep $dumpdb | wc -l"
wait_till $INSTANCE_DELETED
echo "$(date) *** Deleting $snapid..."
aws --region $region rds delete-db-snapshot --db-snapshot-identifier $snapid
else
#-- db instance was not supposed to be dumped
echo "$(date) *** Deleting $snapid..."
aws --region $region rds delete-db-snapshot --db-snapshot-identifier $snapid
fi
fi
done
done
/bin/rm $LOCK
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment