Created
April 20, 2016 16:36
-
-
Save cleverdevil/d520da03614fd711326d12f71cfcc95a to your computer and use it in GitHub Desktop.
Quick parallelized script to delete all objects in a DreamObjects (S3) bucket
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto | |
import sys | |
import time | |
from multiprocessing import Process, Queue | |
from Queue import Empty | |
class Worker(Process): | |
def __init__(self, me, q, bucketname): | |
self.me = me | |
self.q = q | |
self.running = True | |
self.bucket = s3.get_bucket(bucketname, validate=False) | |
Process.__init__(self) | |
def run(self): | |
while self.running: | |
try: | |
keys = self.q.get(False) | |
print 'Worker', self.me, 'deleting', len(keys), 'objects...' | |
self.bucket.delete_keys(keys) | |
print 'Worker', self.me, 'done!' | |
except Empty, e: | |
keys = None | |
print 'Worker', self.me, 'waiting...' | |
time.sleep(1) | |
def finish(self): | |
self.running = False | |
def clear_bucket(s3, bucketname): | |
bucket = s3.get_bucket(bucketname, validate=False) | |
queue = Queue() | |
workers = [Worker(i, queue, bucketname) for i in range(10)] | |
for worker in workers: | |
worker.start() | |
keys = [] | |
for key in bucket.list(): | |
if len(keys) < 100: | |
keys.append(key) | |
else: | |
queue.put(keys) | |
keys = [] | |
if queue.qsize() > 30: | |
print 'Queue full...' | |
time.sleep(5) | |
for worker in workers: | |
worker.finish() | |
s3 = boto.connect_s3(host='objects-us-west-1.dream.io') | |
clear_bucket(s3, 'YOUR-BUCKET-NAME') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
this is broken with python 3
I tried fixing it, but should probably be migrated to boto3 as well
I will try ruby instead https://stackoverflow.com/questions/48490170/ruby-delete-all-s3-objects-with-prefix-using-one-request