Last active
November 5, 2019 10:28
-
-
Save jesperalmstrom/8c96d016e3ecfd0d79afccfe9b05ccf9 to your computer and use it in GitHub Desktop.
Remove deprecated Cost and Usage Report objects from S3 bucket
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Usage for a AWS test profile: | |
# python scriptname.py test | less | |
import boto3 | |
import botocore.session | |
import argparse | |
import json | |
# define some colors | |
class bcolors: | |
HEADER = '\033[95m' | |
OKBLUE = '\033[94m' | |
OKGREEN = '\033[92m' | |
WARNING = '\033[93m' | |
FAIL = '\033[91m' | |
ENDC = '\033[0m' | |
BOLD = '\033[1m' | |
UNDERLINE = '\033[4m' | |
parser = argparse.ArgumentParser() | |
parser.add_argument("profile", type=str, help="The aws profile to use") | |
parser.add_argument('--dryrun', type=bool, default=True, help="Enter --dryrun False to accutally delete cur objects") | |
args = parser.parse_args() | |
dryrun = (args.dryrun == 'False') | |
# Set your profile name on a low-level Botocore session | |
boto3.setup_default_session(profile_name=args.profile) | |
cur_bucket = 'bucket.aws.cost.report' # Replace with your bucket | |
cur_prefix = 'cur/Cost_and_Usage_report/' # Replace with cur prefix | |
manifest_file = 'Cost_and_Usage_report-Manifest.json' # Name of the manifest file that reside in the current monthly data | |
# for all buckets that we want to change: | |
s3 = boto3.resource('s3') | |
s3client = boto3.client('s3') | |
paginator = s3client.get_paginator('list_objects_v2') | |
for result in paginator.paginate(Bucket=cur_bucket, Prefix=cur_prefix, Delimiter='/'): | |
for prefix in result.get('CommonPrefixes'): | |
manifest_obj = prefix.get('Prefix') + manifest_file | |
print(f'{bcolors.OKGREEN}{bcolors.BOLD}Try to find manifest - {manifest_obj}{bcolors.ENDC}') | |
try: | |
response = s3client.get_object(Bucket=cur_bucket, Key=manifest_obj) | |
manifest_file_content = response['Body'].read().decode() | |
manifest_dict = json.loads(manifest_file_content) | |
folder_to_keep = manifest_dict['assemblyId'] | |
print(f'{bcolors.OKGREEN} This is the folder to keep {folder_to_keep}{bcolors.ENDC}') | |
files = s3client.list_objects(Bucket=cur_bucket, Prefix=prefix.get('Prefix')) | |
for f in files['Contents']: | |
filename = f['Key'] | |
if folder_to_keep not in filename and filename not in manifest_obj: | |
if not dryrun: | |
print(f'{bcolors.OKBLUE}Removing file {filename}{bcolors.ENDC}') | |
# WARNING this is the point of no return | |
s3client.delete_object(Bucket=cur_bucket, Key=filename) | |
pass | |
else: | |
print(f'{bcolors.OKGREEN}Dry run: {filename} not deleted{bcolors.ENDC}') | |
pass | |
else: | |
print(f"{bcolors.WARNING}NOT deleting {filename}{bcolors.ENDC}") | |
except Exception as e: | |
print(bcolors.FAIL + str(e) + bcolors.ENDC) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment