Created
October 24, 2019 04:52
-
-
Save heykarimoff/bdd31d42ef12e6c6a92c218681ad5955 to your computer and use it in GitHub Desktop.
How to Recover Deleted Files in AWS S3 Bucket
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from datetime import datetime, timezone | |
import boto3 | |
# ###################################### | |
# | |
# Empty Bucket of all delete markers from all objects. | |
# | |
# ###################################### | |
# ----------------------------------- | |
# Enter these values here: | |
thebucket = "<bucket_name>" | |
access_key = "<access_key>" | |
secret_key = "<secret_key>" | |
region_name = "<region_name>" # us-east-1 | |
# ------------------------------------ | |
s3 = boto3.resource("s3", region_name=region_name, aws_access_key_id=access_key, aws_secret_access_key=secret_key) | |
s3client = boto3.client("s3", aws_access_key_id=access_key, aws_secret_access_key=secret_key) | |
# paginate 100000 at a time | |
page_size = 100000 | |
folder_in_thebucket = "files/invoices" | |
paginator = s3client.get_paginator("list_object_versions") | |
pageresponse = paginator.paginate( | |
Bucket=thebucket, Prefix=folder_in_thebucket, PaginationConfig={"MaxItems": page_size} | |
) | |
deleted_at = datetime(2019, 10, 22, 20, 0, 0, tzinfo=timezone.utc) | |
def restore_all(pages): | |
# iter over the pages from the paginator | |
for page in pages: | |
# Find if there are any delmarkers | |
if "DeleteMarkers" in page.keys(): | |
for each_delmarker in page["DeleteMarkers"]: | |
if each_delmarker["IsLatest"] is True and each_delmarker["LastModified"] > deleted_at: | |
restore(each_delmarker) | |
def restore(delete_marker): | |
# Create a resource for the version-object | |
# and use .delete() to remove it. | |
file_object_version = s3.ObjectVersion(thebucket, delete_marker["Key"], delete_marker["VersionId"]) | |
# I added this output just so I could watch the script run. | |
print(f"Restoring {delete_marker}") | |
# Lastly, lets remove the del marker and recover one of many files. | |
file_object_version.delete() | |
if __name__ == "__main__": | |
print(f"Restoring files deleted after {deleted_at} in {thebucket}/{folder_in_thebucket}.") | |
restore_all(pageresponse) |
I need a code for recover objects from version disabled buckest
That's not something you can do
Hi, I do have a buckets where the images are deleted recently, I think my account was compromised. I do not have any versioning enabled. can we recover our images?
created an improved version which prints human readable time as well as asks for input before deleting the delete marker gist
from datetime import datetime, timezone
import boto3
# -----------------------------------
# Enter these values here:
thebucket = "BUCKET_NAME_HERE"
region_name = "us-east-1"
folder_in_bucket = "" # aka key or prefix
# ------------------------------------
# can optionally write these values if they are not in environment
# access_key = "<access_key>"
# secret_key = "<secret_key>"
# s3 = boto3.resource("s3", region_name=region_name, aws_access_key_id=access_key, aws_secret_access_key=secret_key)
# s3client = boto3.client("s3", aws_access_key_id=access_key, aws_secret_access_key=secret_key)
s3 = boto3.resource("s3", region_name=region_name)
s3client = boto3.client("s3")
page_size = 100000
paginator = s3client.get_paginator("list_object_versions")
pageresponse = paginator.paginate(
Bucket=thebucket, PaginationConfig={"MaxItems": page_size}
)
deleted_at = datetime(2019, 10, 22, 20, 0, 0, tzinfo=timezone.utc)
def time_ago(time=False):
now = datetime.now(timezone.utc)
if type(time) is int:
diff = now - datetime.fromtimestamp(time)
elif isinstance(time,datetime):
diff = now - time
elif not time:
diff = now - now
else:
raise ValueError('invalid date %s of type %s' % (time, type(time)))
second_diff = diff.seconds
day_diff = diff.days
if day_diff < 0:
return ''
if day_diff == 0:
if second_diff < 10:
return "just now"
if second_diff < 60:
return str(round(second_diff)) + " seconds ago"
if second_diff < 120:
return "a minute ago"
if second_diff < 3600:
return str( round(second_diff / 60 )) + " minutes ago"
if second_diff < 7200:
return "an hour ago"
if second_diff < 86400:
return str( round(second_diff / 3600 )) + " hours ago"
if day_diff == 1:
return "Yesterday"
if day_diff < 7:
return str(round(day_diff)) + " days ago"
if day_diff < 31:
return str(round(day_diff/7)) + " weeks ago"
if day_diff < 365:
return str(round(day_diff/30)) + " months ago"
return str(round(day_diff/365)) + " years ago"
def restore_all(pages):
# iter over the pages from the paginator
for page in pages:
# Find if there are any delmarkers
if "DeleteMarkers" in page.keys():
for each_delmarker in page["DeleteMarkers"]:
print(f"\nπ {each_delmarker['Key']}")
if each_delmarker["IsLatest"] is True and each_delmarker["LastModified"] > deleted_at:
print(f" This was deleted {time_ago(each_delmarker['LastModified'])}")
restore(each_delmarker)
def restore(delete_marker):
answer = input(" Would you like to restore this file? [y/N] ") or "N"
if answer == "y" or answer == "Y":
print(" π± Restoring...")
file_object_version = s3.ObjectVersion(thebucket, delete_marker["Key"], delete_marker["VersionId"])
file_object_version.delete()
else:
print(" π Skipping...")
if __name__ == "__main__":
print(f"Restoring files deleted from {time_ago(deleted_at)} in πͺ£ {thebucket}/{folder_in_bucket}")
restore_all(pageresponse)
example output (it's missing the word restore in the question. But that's fixed)
Very appricated, you saved my day
saved my day . Thank you so much
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I need a code for recover objects from version disabled buckest