Created
February 11, 2019 04:27
-
-
Save christippett/ef8946346275ebe01997a69d2c58025f to your computer and use it in GitHub Desktop.
Migrate GCS bucket(s) from one project/location to another
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import uuid | |
from google.cloud import storage | |
from google.cloud.storage import blob, bucket | |
client = storage.Client() | |
BUCKETS = [ | |
"bucket-a", | |
"bucket-b", | |
] | |
NEW_BUCKET_LOCATION = "australia-southeast1" | |
NEW_BUCKET_PROJECT = "example-project" | |
class GoogleCloudStorageMigrationHelper: | |
def __init__( | |
self, buckets_to_migrate, new_location, new_project, staging_bucket_name=None | |
): | |
self.buckets_to_migrate = buckets_to_migrate | |
self.new_location = new_location | |
self.new_project = new_project | |
if not staging_bucket_name: | |
staging_bucket_name = "gcs-migration-stage-" + str(uuid.uuid4())[:8] | |
self.staging_bucket_name = staging_bucket_name | |
def migrate(self): | |
staging_bucket = self.get_or_create_bucket(self.staging_bucket_name) | |
for source_bucket_name in self.buckets_to_migrate: | |
source_bucket = self.get_or_create_bucket(source_bucket_name) | |
# Skip migration if bucket already in new location | |
if ( | |
source_bucket.location | |
and source_bucket.location.lower() == self.new_location.lower() | |
): | |
continue | |
# TODO: check if bucket already in new project | |
# TODO: migrate bucket labels | |
self.copy_objects_to_staging_bucket( | |
source_bucket=source_bucket, target_bucket=staging_bucket | |
) | |
self.delete_bucket(source_bucket) | |
new_bucket = self.get_or_create_bucket( | |
source_bucket.name, project=self.new_project, location=self.new_location | |
) | |
self.copy_objects_from_staging_bucket( | |
source_bucket=staging_bucket, target_bucket=new_bucket | |
) | |
# Clean up | |
# self.delete_bucket(staging_bucket) | |
def get_or_create_bucket(self, bucket_id, **create_kwargs): | |
bucket_ = bucket.Bucket(client, bucket_id) | |
if not bucket_.exists(): | |
print(f"Creating bucket: {bucket_.name}") | |
bucket_.create(**create_kwargs) | |
bucket_.reload() | |
return bucket_ | |
def copy_objects_to_staging_bucket(self, source_bucket, target_bucket): | |
for blob_obj in source_bucket.list_blobs(): | |
object_name = source_bucket.name + "/" + blob_obj.name | |
self._rewrite_blob(blob_obj, object_name, target_bucket) | |
def copy_objects_from_staging_bucket(self, source_bucket, target_bucket): | |
for blob_obj in source_bucket.list_blobs(prefix=target_bucket.name): | |
_, _, object_name = blob_obj.name.partition("/") | |
self._rewrite_blob(blob_obj, object_name, target_bucket) | |
def _rewrite_blob(self, blob_obj, target_name, target_bucket): | |
new_blob_obj = blob.Blob(target_name, target_bucket) | |
if not new_blob_obj.exists(): | |
print(f"Copying object: {target_name}") | |
new_blob_obj.rewrite(blob_obj) | |
def delete_bucket(self, bucket): | |
if bucket.exists(): | |
print(f"Deleting bucket: {bucket.name}") | |
bucket.delete(force=True) | |
if __name__ == "__main__": | |
migration_helper = GoogleCloudStorageMigrationHelper( | |
buckets_to_migrate=BUCKETS, | |
new_location=NEW_BUCKET_LOCATION, | |
new_project=NEW_BUCKET_PROJECT, | |
) | |
migration_helper.migrate() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment