Created
January 27, 2019 19:45
-
-
Save andrewgross/6479353aef6917918ad000b8176ec062 to your computer and use it in GitHub Desktop.
This script is to help for properly setting permissions so that you can read S3 Inventory data in an account that is not the owner of a bucket. It assumes that you have a ROLE_ARN that can assume a role in the main account that has Read Permissions and R/W for Object ACL permissions on your s3 bucket.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import json | |
BUCKET_NAME = "<s3_bucket_name>" | |
INVENTORY_PREFIX = "<prefix_given_to_s3_inventory>" # Should have data/, hive/, and some dated folders inside of it | |
ACCOUNT_CUID = "<your_canonical_user_id_for_cross_account>" # Account which is not the owner of S3 bucket, but trying to access it. Controls ROLE_ARN | |
ROLE_ARN = "<role_in_cross_account_that_can_assume_to_main_account>" | |
def role_arn_to_session(role_arn): | |
client = boto3.client('sts') | |
response = client.assume_role(RoleArn=role_arn, RoleSessionName="S3InventoryCleanup") | |
return boto3.Session( | |
aws_access_key_id=response['Credentials']['AccessKeyId'], | |
aws_secret_access_key=response['Credentials']['SecretAccessKey'], | |
aws_session_token=response['Credentials']['SessionToken']) | |
session = role_arn_to_session(ROLE_ARN) | |
s3_resource = session.resource('s3', region_name='us-east-1') | |
s3_client = session.client('s3', region_name='us-east-1') | |
def _get_latest_inventory_date_prefix(): | |
""" | |
Inventory dates are run for "yesterday" from the perspective of S3 Inventory. | |
""" | |
now = datetime.datetime.utcnow() | |
yesterday = now - datetime.timedelta(days=1) | |
# This might be different depending on when you set | |
# s3 inventory to run | |
return yesterday.strftime("%Y-%m-%dT04-00Z") | |
def get_manifest_file_location(prefix): | |
_date = _get_latest_inventory_date_prefix() | |
manifest_location = "/".join([prefix, _date, "manifest.json"]) | |
return manifest_location | |
def get_inventory_file_list(bucket, prefix): | |
inventory_manifest = get_manifest_file_location(prefix) | |
manifest = s3_client.get_object(Bucket=bucket, Key=inventory_manifest) | |
body = manifest['Body'] | |
content = body.read() | |
_json = json.loads(content.decode("utf-8")) | |
return [k['key'] for k in _json.get('files')] | |
def _has_grant(grants, cuid): | |
for grant in grants: | |
if grant.get('Grantee', {}).get('ID') == cuid: | |
return True | |
return False | |
def set_permissions(bucket, files): | |
for key in files: | |
object_acl = s3_resource.ObjectAcl(bucket, key) | |
owner = object_acl.owner | |
grants = object_acl.grants | |
if not _has_grant(grants, ACCOUNT_CUID): | |
grants.append({ | |
'Grantee': { | |
'ID': ACCOUNT_CUID, | |
'Type': 'CanonicalUser' | |
}, | |
'Permission': 'FULL_CONTROL' | |
}) | |
acl = { | |
'Grants': grants, | |
'Owner': owner | |
} | |
object_acl.put(AccessControlPolicy=acl) | |
def get_latest_inventory_s3_files(bucket, prefix): | |
files = get_inventory_file_list(bucket, prefix) | |
set_permissions(bucket, files) | |
s3_paths = ["s3://{}/{}".format(bucket, f) for f in files] | |
return s3_paths |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment