-
-
Save bkozora/d4f1cf0e5cf26acdd377 to your computer and use it in GitHub Desktop.
# Automated AMI and Snapshot Deletion | |
# | |
# @author Bobby Kozora | |
# | |
# This script will search for all instances having a tag named "Backup" with a value of "Backup". | |
# As soon as we have the instances list, we loop through each instance | |
# and reference the AMIs of that instance. We check that the latest daily backup | |
# succeeded then we store every image that's reached its DeleteOn tag's date for | |
# deletion. We then loop through the AMIs, deregister them and remove all the | |
# snapshots associated with that AMI. | |
import boto3 | |
import collections | |
import datetime | |
import time | |
import sys | |
ec = boto3.client('ec2', 'us-east-1') | |
ec2 = boto3.resource('ec2', 'us-east-1') | |
images = ec2.images.filter(Owners=["self"]) | |
def lambda_handler(event, context): | |
reservations = ec.describe_instances(Filters=[ | |
{ | |
'Name': 'tag-key', | |
'Values': ['backup', 'Backup'] | |
}, | |
]).get('Reservations', []) | |
instances = sum([[i for i in r['Instances']] for r in reservations], []) | |
print("Found %d instances that need evaluated" % len(instances)) | |
to_tag = collections.defaultdict(list) | |
date = datetime.datetime.now() | |
date_fmt = date.strftime('%Y-%m-%d') | |
imagesList = [] | |
# Set to true once we confirm we have a backup taken today | |
backupSuccess = False | |
# Loop through all of our instances with a tag named "Backup" | |
for instance in instances: | |
imagecount = 0 | |
# Loop through each image of our current instance | |
for image in images: | |
# Our other Lambda Function names its AMIs Lambda - i-instancenumber. | |
# We now know these images are auto created | |
if image.name.startswith('Lambda - ' + instance['InstanceId']): | |
# print "FOUND IMAGE " + image.id + " FOR INSTANCE " + instance['InstanceId'] | |
# Count this image's occcurance | |
imagecount = imagecount + 1 | |
try: | |
if image.tags is not None: | |
deletion_date = [ | |
t.get('Value') for t in image.tags | |
if t['Key'] == 'DeleteOn' | |
][0] | |
delete_date = time.strptime(deletion_date, "%m-%d-%Y") | |
except IndexError: | |
deletion_date = False | |
delete_date = False | |
today_time = datetime.datetime.now().strftime('%m-%d-%Y') | |
# today_fmt = today_time.strftime('%m-%d-%Y') | |
today_date = time.strptime(today_time, '%m-%d-%Y') | |
# If image's DeleteOn date is less than or equal to today, | |
# add this image to our list of images to process later | |
if delete_date <= today_date: | |
imagesList.append(image.id) | |
# Make sure we have an AMI from today and mark backupSuccess as true | |
if image.name.endswith(date_fmt): | |
# Our latest backup from our other Lambda Function succeeded | |
backupSuccess = True | |
print("Latest backup from " + date_fmt + " was a success") | |
print("instance " + instance['InstanceId'] + " has " + | |
str(imagecount) + " AMIs") | |
print("=============") | |
print("About to process the following AMIs:") | |
print(imagesList) | |
if backupSuccess == True: | |
myAccount = boto3.client('sts').get_caller_identity()['Account'] | |
snapshots = ec.describe_snapshots(MaxResults=1000, | |
OwnerIds=[myAccount])['Snapshots'] | |
# loop through list of image IDs | |
for image in imagesList: | |
print("deregistering image %s" % image) | |
amiResponse = ec.deregister_image( | |
DryRun=False, | |
ImageId=image, | |
) | |
for snapshot in snapshots: | |
if snapshot['Description'].find(image) > 0: | |
snap = ec.delete_snapshot( | |
SnapshotId=snapshot['SnapshotId']) | |
print("Deleting snapshot " + snapshot['SnapshotId']) | |
print("-------------") | |
else: | |
print("No current backup found. Termination suspended.") |
I found the solution to poor performance and timeouts.
First, I modified the backup script to create a tag of InstanceID on the AMI. This way we can filter the AMIs on theFor instance in instances
loop to only list the AMIs relating to that instance instead of it looping through every AMI in the account (in our account we have over 4000 and 80 instances. So it would loop through 4000 AMIs 80 times!!).Here is the code. See my comment on the bkozora's backup script to see the code inserted to add the tag to instances.
# Loop through all of our instances with a tag named "Backup" for instance in instances: imagecount = 0 instance_ID = instance['InstanceId'] # Loop through each image of our current instance images = ec2.images.filter( Owners=["self"], Filters=[ { 'Name': 'tag:InstanceID', 'Values': [instance_ID , ] }, ], ) for image in images:
When i run the clean up it detects the instances, and is about to process the ones that need to be deleted, but it says no backup found. I noticed if i have more than 10 instances (even though they are not part of my backup tags) it gives me that. Did you run into issues like this?
This is the error msg
instance i-0d92511eef8axxxx has 1 AMIs
About to process the following AMIs:
['ami-25e9xxxx']
No current backup found. Termination suspended.
END RequestId: e27c4424-1ce5-437a-9756-f83e390a6a3e
REPORT RequestId: e27c4424-1ce5-437a-9756-f83e390a6a3e Duration: 1851.28 ms Billed Duration: 1900 ms Memory Size: 128 MB Max Memory Used: 95 MB Init Duration: 515.93 ms
I was also having same issues and found that on line 44 there is a parameter mentioned 'backupSuccess = False' & I made it to 'backupSuccess = True' and Voila!, this script started deregistering AMIs
Thanks,
Rocky
I was also having same issues and found that on line 44 there is a parameter mentioned 'backupSuccess = False' & I made it to 'backupSuccess = True' and Voila!, this script started deregistering AMIs
Thanks,
Rocky
Omg! Thank you! that worked!!!! mine was on line 46, but yes i changed it from False to True and now they started deregistering!
@mcalr3 @bkozora
I have modified my codes to create ami after every two hours
the ami creation is working fine but cleanup isnt working, please check the codes i have mentioned below
getting error
2019-11-20
03:46:35
START RequestId: 2588433b-cf23-4a74-ac4e-5a0d573c8275 Version: $LATEST
03:46:35
Found 2 instances that need evaluated
03:46:35
Present date and time:20-11-2019:03.11.1574221595
03:46:39
instance i-01c761f3ac6d9bf56 has 5 AMIs
03:46:43
instance i-0bfa6653317e482ea has 5 AMIs
03:46:43
03:46:43
About to process the following AMIs:
03:46:43
[]
03:46:43
03:46:43
About to process the following Snapshots associated with above Images:
03:46:43
[]
03:46:43
The timer is started for 5 seconds to wait for images to deregister before deleting the snapshots associated to it
03:46:48
03:46:48
END RequestId: 2588433b-cf23-4a74-ac4e-5a0d573c8275
03:46:48
REPORT RequestId: 2588433b-cf23-4a74-ac4e-5a0d573c8275 Duration: 12531.54 ms Billed Duration: 12600 ms Memory Size: 128 MB Max Memory U
ami creation code
Automated AMI Backups
This code refered with slight changes: https://gist.github.com/bkozora/724e01903a9ad481d21e
This script will search for all instances having a tag with "Backup"
on it and are in 'running' state. As soon as it has the instances list, it loop through each instance
and create an AMI of it. Also, it will look for a "Retention" tag key which
will be used as a retention policy number in days. If there is no tag with
that name, it will use a 4 days default value for each AMI.If there is no tag with that name, it will use a 7 days default value for each AMI.
After creating the AMI it creates a "DeleteOn" tag on the AMI indicating when
it will be deleted using the Retention value and another Lambda function
import boto3
import collections
import datetime
#By the time I used this script, the Lamda is not available in Mumbai region. So, I chosen Singapore region.
#Specify the region in which EC2 Instances located and to create AMI's. Ex: Mumbai region (ap-south-1)
ec = boto3.client('ec2', 'ap-south-1')
#ec = boto3.client('ec2')
def lambda_handler(event, context):
reservations = ec.describe_instances(
Filters=[
{'Name': 'tag:Hourly', 'Values': ['twohours']},
{ 'Name': 'instance-state-name','Values': ['running'] }
]
).get(
'Reservations', []
)
instances = sum(
[
[i for i in r['Instances']]
for r in reservations
], [])
print "Found %d instances that need backing up" % len(instances)
to_tag = collections.defaultdict(list)
for instance in instances:
print "Instance name:" + [res['Value'] for res in instance['Tags'] if res['Key'] == 'Name'][0]
#Default retention for 7 days if the tag is not specified
try:
retention_days = [
int(t.get('Value')) for t in instance['Tags']
if t['Key'] == 'Retention'][0]
except IndexError:
retention_days = 1
except ValueError:
retention_days = 1
except Exception as e:
retention_days = 1
finally:
create_time = datetime.datetime.now()
create_fmt = create_time.strftime('%d-%m-%Y.%H.%M.%S')
#create_fmt = create_time.strftime('%d-%m-%Y')
try:
#Check for instance in running state
# if(ec.describe_instance_status(InstanceIds=[instance['InstanceId']],Filters=[{ 'Name': 'instance-state-name','Values': ['running'] }])['InstanceStatuses'][0]['InstanceState']['Name'] == 'running'):
#To make sure instance NoReboot enabled and to name the AMI
AMIid = ec.create_image(InstanceId=instance['InstanceId'], Name="Lambda-2hour - " + [result['Value'] for result in instance['Tags'] if result['Key'] == 'Name'][0] + " - " + " From " + create_fmt, Description="Lambda created AMI of instance " + instance['InstanceId'], NoReboot=True, DryRun=False)
to_tag[retention_days].append(AMIid['ImageId'])
print "Retaining AMI %s of instance %s for %d days" % (
AMIid['ImageId'],
instance['InstanceId'],
retention_days,
)
for retention_days in to_tag.keys():
delete_date = datetime.date.today() + datetime.timedelta(days=retention_days)
delete_fmt = delete_date.strftime('%d-%m-%Y')
print "Will delete %d AMIs on %s" % (len(to_tag[retention_days]), delete_fmt)
#To create a tag to an AMI when it can be deleted after retention period expires
ec.create_tags(
Resources=to_tag[retention_days],
Tags=[
{'Key': 'DeleteOn', 'Value': delete_fmt},
]
)
#If the instance is not in running state
except IndexError as e:
print "Unexpected error, instance "+[res['Value'] for res in instance['Tags'] if res['Key'] == 'Name'][0]+"-"+"\""+instance['InstanceId']+"\""+" might be in the state other then 'running'. So, AMI creation skipped."
cleanup code
Automated AMI and Snapshot Deletion
This code refered with slight changes: https://gist.github.com/bkozora/d4f1cf0e5cf26acdd377
This script will search for all instances having a tag with "Backup"
on it. As soon as it has the instances list, it loop through each instance
and reference the AMIs of that instance. It check that the latest daily backup
succeeded then it store every image that's reached its DeleteOn tag's date for
deletion. It then loop through the AMIs, deregister them and remove all the
snapshots associated with that AMI.
import boto3
import collections
import datetime
import time
import sys
#specify the region in which EC2 Instances located and to cleanup AMI's. Ex: Mumbai region (ap-south-1)
ec = boto3.client('ec2', 'ap-south-1')
ec2 = boto3.resource('ec2', 'ap-south-1')
images = ec2.images.filter(Owners=["XXXXX"])# Specify your AWS account owner id in place of "XXXXX" at all the places in this script
def lambda_handler(event, context):
reservations = ec.describe_instances(
Filters=[
{'Name': 'tag:Hourly', 'Values': ['twohours']},
{ 'Name': 'instance-state-name','Values': ['running'] }
]
).get(
'Reservations', []
)
instances = sum(
[
[i for i in r['Instances']]
for r in reservations
], [])
print "Found %d instances that need evaluated" % len(instances)
to_tag = collections.defaultdict(list)
date = datetime.datetime.now()
date_fmt = date.strftime('%d-%m-%Y')
print "Present date and time:" + date.strftime('%d-%m-%Y:%H.%m.%s')
imagesList = []
# Set to true once we confirm we have a backup taken today
backupSuccess = True
# Loop through all of our instances with a tag named "Backup"
for instance in instances:
imagecount = 0
# Loop through each image of our current instance
for image in images:
# Our other Lambda Function names its AMIs Lambda - Instance Name.
# We now know these images are auto created
if image.name.startswith('Lambda - ' + [result['Value'] for result in instance['Tags'] if result['Key'] == 'Name'][0]):
#if image.name.startswith('Lambda ec2 tag - ' + [result['Value'] for result in instance['Tags'] if result['Key'] == 'Name'][0]):
#print "FOUND IMAGE " + image.id + " FOR INSTANCE " + instance['InstanceId']
# Count this image's occcurance
imagecount = imagecount + 1
try:
if image.tags is not None:
deletion_date = [
t.get('Value') for t in image.tags
if t['Key'] == 'DeleteOn'][0]
delete_date = time.strptime(deletion_date, "%d-%m-%Y")
except IndexError:
deletion_date = False
delete_date = False
today_time = datetime.datetime.now().strftime('%d-%m-%Y')
# today_fmt = today_time.strftime('%m-%d-%Y')
today_date = time.strptime(today_time, '%d-%m-%Y')
# If image's DeleteOn date is less than or equal to today,
# add this image to our list of images to process later
if delete_date <= today_date:
imagesList.append(image.id)
# Make sure we have an AMI from today and mark backupSuccess as true
if image.name.endswith(date_fmt):
# Our latest backup from our other Lambda Function succeeded
backupSuccess = True
print "Latest backup from " + date_fmt + " was a success"
print "instance " + instance['InstanceId'] + " has " + str(imagecount) + " AMIs"
print "============="
print "About to process the following AMIs:"
print imagesList
if backupSuccess == True:
snapshotList = []
for image in imagesList:
#print image
desc_image_snapshots = ec.describe_images(ImageIds=[image],Owners=['XXXXX',])['Images'][0]['BlockDeviceMappings']
# print (desc_image_snapshots)
try:
for desc_image_snapshot in desc_image_snapshots:
snapshot = ec.describe_snapshots(SnapshotIds=[desc_image_snapshot['Ebs']['SnapshotId'],], OwnerIds=['XXXXX'])['Snapshots'][0]
#if snapshot['Description'].find(image) > 0:
snapshotList.append(snapshot['SnapshotId'])
#else:
# continue
# print "Snapshot is not associated with an AMI"
except Exception as e:
print "Ignore Index Error:%s" % e.message
print "Deregistering image %s" % image
amiResponse = ec.deregister_image(
DryRun=False,
ImageId=image,
)
print "============="
print "About to process the following Snapshots associated with above Images:"
print (snapshotList)
print "The timer is started for 5 seconds to wait for images to deregister before deleting the snapshots associated to it"
time.sleep(5)# This should be set to higher value if the image in the imagesList takes more time to deregister
for snapshot in snapshotList:
try:
snap = ec.delete_snapshot(SnapshotId=snapshot)
print "Deleted snapshot " + snapshot
except Exception as e:
print "%s" % e.message
print "-------------"
else:
print "No current backup found. Termination suspended."
Hi,
I tried running the code with python 3.6 but it is throwing following error.
=======Error Message===========
{
"errorMessage": "'<=' not supported between instances of 'bool' and 'time.struct_time'",
"errorType": "TypeError",
"stackTrace": [
" File "/var/task/lambda_function.py", line 66, in lambda_handler\n if delete_date <= today_date:\n"
]
}
@mohanish12 For those of you who changed line 65 in the backup function script:
create_fmt = create_time.strftime('%Y-%m-%d')
To
create_fmt = create_time.strftime('%Y-%m-%d--%H-%M-%S')
The cleanup function is looking for
if image.name.endswith(date_fmt):
on line 86. Since the strftime variable does not now end with today's date (now time), then it will throw error.My workaround was to put the time in front of the date, like this:
create_fmt = create_time.strftime('%H-%M-%S(UTC)--on--%Y-%m-%d')
Hope this helps someone.
Thank you so much for such solution.
Here is the cleanup code & the output: It able to detect the instance but not able to delete the ami created today, even I changed backupSuccess = True
Test Event Name
test
Response
null
Function Logs
START RequestId: 3f72e0b7-59bb-433e-a46d-4f097c8acb45 Version: $LATEST
Found 1 instances that need evaluated
instance i-0acbe212ea7c31c7e has 0 AMIs
About to process the following AMIs:
[]
END RequestId: 3f72e0b7-59bb-433e-a46d-4f097c8acb45
import boto3
import collections
import datetime
import time
import sys
ec = boto3.client('ec2', 'ap-south-1')
ec2 = boto3.resource('ec2', 'ap-south-1')
images = ec2.images.filter(Owners=["self"])
def lambda_handler(event, context):
reservations = ec.describe_instances(Filters=[
{
'Name': 'tag-key',
'Values': ['backup', 'Backup']
},
]).get('Reservations', [])
instances = sum([[i for i in r['Instances']] for r in reservations], [])
print("Found %d instances that need evaluated" % len(instances))
to_tag = collections.defaultdict(list)
date = datetime.datetime.now()
date_fmt = date.strftime('%Y-%m-%d')
imagesList = []
# Set to true once we confirm we have a backup taken today
backupSuccess = True
# Loop through all of our instances with a tag named "Backup"
for instance in instances:
imagecount = 0
# Loop through each image of our current instance
for image in images:
# Our other Lambda Function names its AMIs Lambda - i-instancenumber.
# We now know these images are auto created
if image.name.startswith('Lambda - ' + instance['InstanceId']):
# print "FOUND IMAGE " + image.id + " FOR INSTANCE " + instance['InstanceId']
# Count this image's occcurance
imagecount = imagecount + 1
try:
if image.tags is not None:
deletion_date = [
t.get('Value') for t in image.tags
if t['Key'] == 'DeleteOn'
][0]
delete_date = time.strptime(deletion_date, "%m-%d-%Y")
except IndexError:
deletion_date = False
delete_date = False
today_time = datetime.datetime.now().strftime('%m-%d-%Y')
# today_fmt = today_time.strftime('%m-%d-%Y')
today_date = time.strptime(today_time, '%m-%d-%Y')
# If image's DeleteOn date is less than or equal to today,
# add this image to our list of images to process later
if delete_date <= today_date:
imagesList.append(image.id)
# Make sure we have an AMI from today and mark backupSuccess as true
if image.name.endswith(date_fmt):
# Our latest backup from our other Lambda Function succeeded
backupSuccess = True
print("Latest backup from " + date_fmt + " was a success")
print("instance " + instance['InstanceId'] + " has " +
str(imagecount) + " AMIs")
print("=============")
print("About to process the following AMIs:")
print(imagesList)
if backupSuccess == True:
myAccount = boto3.client('sts').get_caller_identity()['Account']
snapshots = ec.describe_snapshots(MaxResults=1000,
OwnerIds=[myAccount])['Snapshots']
# loop through list of image IDs
for image in imagesList:
print("deregistering image %s" % image)
amiResponse = ec.deregister_image(
DryRun=False,
ImageId=image,
)
for snapshot in snapshots:
if snapshot['Description'].find(image) > 0:
snap = ec.delete_snapshot(
SnapshotId=snapshot['SnapshotId'])
print("Deleting snapshot " + snapshot['SnapshotId'])
print("-------------")
else:
print("No current backup found. Termination suspended.")
Hi
I have code to have the retention backup days for 7, however, how to exclude the weekends snapshots for the last 35 days in deletion.
I want to have the snapshots for last 7 days and last 5 weekend Saturdays snapshots without using tags.
Kindly let me know the code if possible
@bkozora, I tried to run updated code but still not works. pls find enclosed for your reference